summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:11:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:12:02 +0000
commit8d36f5966675e23bee7026ba37ae0647fbf47300 (patch)
treedf4227bbb3b07cb70df87237bcff03c8efd7822d /tests
parentReleasing debian version 22.2.0-1. (diff)
downloadsqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.tar.xz
sqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.zip
Merging upstream version 23.7.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests')
-rw-r--r--tests/dataframe/integration/test_session.py7
-rw-r--r--tests/dataframe/unit/test_column.py4
-rw-r--r--tests/dataframe/unit/test_functions.py22
-rw-r--r--tests/dataframe/unit/test_session.py6
-rw-r--r--tests/dialects/test_athena.py20
-rw-r--r--tests/dialects/test_bigquery.py83
-rw-r--r--tests/dialects/test_clickhouse.py28
-rw-r--r--tests/dialects/test_dialect.py117
-rw-r--r--tests/dialects/test_drill.py63
-rw-r--r--tests/dialects/test_duckdb.py90
-rw-r--r--tests/dialects/test_hive.py27
-rw-r--r--tests/dialects/test_mysql.py67
-rw-r--r--tests/dialects/test_oracle.py68
-rw-r--r--tests/dialects/test_postgres.py147
-rw-r--r--tests/dialects/test_presto.py30
-rw-r--r--tests/dialects/test_prql.py17
-rw-r--r--tests/dialects/test_redshift.py38
-rw-r--r--tests/dialects/test_snowflake.py175
-rw-r--r--tests/dialects/test_spark.py37
-rw-r--r--tests/dialects/test_sqlite.py132
-rw-r--r--tests/dialects/test_teradata.py5
-rw-r--r--tests/dialects/test_tsql.py29
-rw-r--r--tests/fixtures/identity.sql10
-rw-r--r--tests/fixtures/optimizer/canonicalize.sql11
-rw-r--r--tests/fixtures/optimizer/merge_subqueries.sql18
-rw-r--r--tests/fixtures/optimizer/optimizer.sql19
-rw-r--r--tests/fixtures/optimizer/pushdown_projections.sql6
-rw-r--r--tests/fixtures/optimizer/qualify_columns.sql14
-rw-r--r--tests/fixtures/optimizer/qualify_columns_ddl.sql15
-rw-r--r--tests/fixtures/optimizer/qualify_tables.sql1
-rw-r--r--tests/fixtures/optimizer/simplify.sql99
-rw-r--r--tests/fixtures/optimizer/tpc-ds/call_center.csv.gzbin425 -> 427 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gzbin460883 -> 393991 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gzbin158215 -> 167258 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gzbin1814673 -> 639482 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/customer.csv.gzbin107573 -> 109529 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/customer_address.csv.gzbin28719 -> 28542 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gzbin126715 -> 33320 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/date_dim.csv.gzbin1575448 -> 208646 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gzbin23544 -> 23432 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/income_band.csv.gzbin191 -> 194 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/inventory.csv.gzbin202661 -> 43193 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/item.csv.gzbin31336 -> 31259 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/promotion.csv.gzbin501 -> 501 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/reason.csv.gzbin83 -> 87 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gzbin633 -> 623 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/store.csv.gzbin397 -> 398 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/store_returns.csv.gzbin255650 -> 266354 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/store_sales.csv.gzbin2436694 -> 421923 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/time_dim.csv.gzbin680588 -> 77216 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/tpc-ds.sql306
-rw-r--r--tests/fixtures/optimizer/tpc-ds/warehouse.csv.gzbin221 -> 224 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/web_page.csv.gzbin212 -> 215 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/web_returns.csv.gzbin67833 -> 71921 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/web_sales.csv.gzbin867887 -> 615903 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-ds/web_site.csv.gzbin406 -> 409 bytes
-rw-r--r--tests/fixtures/optimizer/tpc-h/tpc-h.sql18
-rw-r--r--tests/fixtures/pretty.sql11
-rw-r--r--tests/test_executor.py23
-rw-r--r--tests/test_expressions.py47
-rw-r--r--tests/test_lineage.py78
-rw-r--r--tests/test_optimizer.py88
-rw-r--r--tests/test_tokens.py12
-rw-r--r--tests/test_transpile.py68
64 files changed, 1606 insertions, 450 deletions
diff --git a/tests/dataframe/integration/test_session.py b/tests/dataframe/integration/test_session.py
index ec50034..3bb3e20 100644
--- a/tests/dataframe/integration/test_session.py
+++ b/tests/dataframe/integration/test_session.py
@@ -34,3 +34,10 @@ class TestSessionFunc(DataFrameValidator):
.agg(SF.countDistinct(SF.col("employee_id")))
)
self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True)
+
+ def test_nameless_column(self):
+ query = "SELECT MAX(age) FROM employee"
+ df = self.spark.sql(query)
+ dfs = self.sqlglot.sql(query)
+ # Spark will alias the column to `max(age)` while sqlglot will alias to `_col_0` so their schemas will differ
+ self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True)
diff --git a/tests/dataframe/unit/test_column.py b/tests/dataframe/unit/test_column.py
index 7a12808..833005b 100644
--- a/tests/dataframe/unit/test_column.py
+++ b/tests/dataframe/unit/test_column.py
@@ -150,8 +150,8 @@ class TestDataframeColumn(unittest.TestCase):
F.col("cola").between(datetime.date(2022, 1, 1), datetime.date(2022, 3, 1)).sql(),
)
self.assertEqual(
- "cola BETWEEN CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP) "
- "AND CAST('2022-03-01T01:01:01+00:00' AS TIMESTAMP)",
+ "cola BETWEEN CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP) "
+ "AND CAST('2022-03-01 01:01:01+00:00' AS TIMESTAMP)",
F.col("cola")
.between(datetime.datetime(2022, 1, 1, 1, 1, 1), datetime.datetime(2022, 3, 1, 1, 1, 1))
.sql(),
diff --git a/tests/dataframe/unit/test_functions.py b/tests/dataframe/unit/test_functions.py
index e40d50d..884cded 100644
--- a/tests/dataframe/unit/test_functions.py
+++ b/tests/dataframe/unit/test_functions.py
@@ -29,7 +29,7 @@ class TestFunctions(unittest.TestCase):
test_date = SF.lit(datetime.date(2022, 1, 1))
self.assertEqual("CAST('2022-01-01' AS DATE)", test_date.sql())
test_datetime = SF.lit(datetime.datetime(2022, 1, 1, 1, 1, 1))
- self.assertEqual("CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql())
+ self.assertEqual("CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql())
test_dict = SF.lit({"cola": 1, "colb": "test"})
self.assertEqual("STRUCT(1 AS cola, 'test' AS colb)", test_dict.sql())
@@ -51,7 +51,7 @@ class TestFunctions(unittest.TestCase):
test_date = SF.col(datetime.date(2022, 1, 1))
self.assertEqual("CAST('2022-01-01' AS DATE)", test_date.sql())
test_datetime = SF.col(datetime.datetime(2022, 1, 1, 1, 1, 1))
- self.assertEqual("CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql())
+ self.assertEqual("CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql())
test_dict = SF.col({"cola": 1, "colb": "test"})
self.assertEqual("STRUCT(1 AS cola, 'test' AS colb)", test_dict.sql())
@@ -250,9 +250,9 @@ class TestFunctions(unittest.TestCase):
def test_log10(self):
col_str = SF.log10("cola")
- self.assertEqual("LOG10(cola)", col_str.sql())
+ self.assertEqual("LOG(10, cola)", col_str.sql())
col = SF.log10(SF.col("cola"))
- self.assertEqual("LOG10(cola)", col.sql())
+ self.assertEqual("LOG(10, cola)", col.sql())
def test_log1p(self):
col_str = SF.log1p("cola")
@@ -262,9 +262,9 @@ class TestFunctions(unittest.TestCase):
def test_log2(self):
col_str = SF.log2("cola")
- self.assertEqual("LOG2(cola)", col_str.sql())
+ self.assertEqual("LOG(2, cola)", col_str.sql())
col = SF.log2(SF.col("cola"))
- self.assertEqual("LOG2(cola)", col.sql())
+ self.assertEqual("LOG(2, cola)", col.sql())
def test_rint(self):
col_str = SF.rint("cola")
@@ -1156,17 +1156,17 @@ class TestFunctions(unittest.TestCase):
def test_regexp_extract(self):
col_str = SF.regexp_extract("cola", r"(\d+)-(\d+)", 1)
- self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)', 1)", col_str.sql())
+ self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)', 1)", col_str.sql())
col = SF.regexp_extract(SF.col("cola"), r"(\d+)-(\d+)", 1)
- self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)', 1)", col.sql())
+ self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)', 1)", col.sql())
col_no_idx = SF.regexp_extract(SF.col("cola"), r"(\d+)-(\d+)")
- self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)')", col_no_idx.sql())
+ self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)')", col_no_idx.sql())
def test_regexp_replace(self):
col_str = SF.regexp_replace("cola", r"(\d+)", "--")
- self.assertEqual("REGEXP_REPLACE(cola, '(\\d+)', '--')", col_str.sql())
+ self.assertEqual("REGEXP_REPLACE(cola, '(\\\\d+)', '--')", col_str.sql())
col = SF.regexp_replace(SF.col("cola"), r"(\d+)", "--")
- self.assertEqual("REGEXP_REPLACE(cola, '(\\d+)', '--')", col.sql())
+ self.assertEqual("REGEXP_REPLACE(cola, '(\\\\d+)', '--')", col.sql())
def test_initcap(self):
col_str = SF.initcap("cola")
diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py
index e2ebae4..848c603 100644
--- a/tests/dataframe/unit/test_session.py
+++ b/tests/dataframe/unit/test_session.py
@@ -79,7 +79,7 @@ class TestDataframeSession(DataFrameSQLValidator):
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
df = self.spark.sql(query).groupBy(F.col("cola")).agg(F.sum("colb"))
self.assertEqual(
- "WITH t38189 AS (SELECT cola, colb FROM table), t42330 AS (SELECT cola, colb FROM t38189) SELECT cola, SUM(colb) FROM t42330 GROUP BY cola",
+ "WITH t26614 AS (SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`), t23454 AS (SELECT cola, colb FROM t26614) SELECT cola, SUM(colb) FROM t23454 GROUP BY cola",
df.sql(pretty=False, optimize=False)[0],
)
@@ -87,14 +87,14 @@ class TestDataframeSession(DataFrameSQLValidator):
query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1"
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
df = self.spark.sql(query)
- expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
+ expected = "CREATE TABLE `new_table` AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
self.compare_sql(df, expected)
def test_sql_insert(self):
query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1"
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
df = self.spark.sql(query)
- expected = "INSERT INTO new_table SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
+ expected = "INSERT INTO `new_table` SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
self.compare_sql(df, expected)
def test_session_create_builder_patterns(self):
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py
new file mode 100644
index 0000000..3288ada
--- /dev/null
+++ b/tests/dialects/test_athena.py
@@ -0,0 +1,20 @@
+from tests.dialects.test_dialect import Validator
+
+
+class TestAthena(Validator):
+ dialect = "athena"
+ maxDiff = None
+
+ def test_athena(self):
+ self.validate_identity(
+ """USING EXTERNAL FUNCTION some_function(input VARBINARY)
+ RETURNS VARCHAR
+ LAMBDA 'some-name'
+ SELECT
+ some_function(1)""",
+ check_command_warning=True,
+ )
+
+ self.validate_identity(
+ "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
+ )
diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py
index 0d94d19..300d492 100644
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@@ -51,6 +51,8 @@ class TestBigQuery(Validator):
self.assertEqual(table.name, "_y")
self.validate_identity("SELECT * FROM x-0.y")
+ self.assertEqual(exp.to_table("`a.b`.`c.d`", dialect="bigquery").sql(), '"a"."b"."c"."d"')
+ self.assertEqual(exp.to_table("`x`.`y.z`", dialect="bigquery").sql(), '"x"."y"."z"')
self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql(), '"x"."y"."z"')
self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql("bigquery"), "`x.y.z`")
self.assertEqual(exp.to_table("`x`.`y`", dialect="bigquery").sql("bigquery"), "`x`.`y`")
@@ -58,6 +60,8 @@ class TestBigQuery(Validator):
select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`")
self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF")
+ self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text")
+ self.validate_identity("SELECT jsondoc['some_key']")
self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`")
self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`")
self.validate_identity("CREATE OR REPLACE TABLE `a.b.c` CLONE `a.b.d`")
@@ -178,6 +182,13 @@ class TestBigQuery(Validator):
"""SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')"""
)
self.validate_identity(
+ "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id",
+ )
+ self.validate_identity(
+ "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
+ "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c",
+ )
+ self.validate_identity(
"SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)",
"SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)",
)
@@ -186,10 +197,6 @@ class TestBigQuery(Validator):
"SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)",
)
self.validate_identity(
- "create or replace view test (tenant_id OPTIONS(description='Test description on table creation')) select 1 as tenant_id, 1 as customer_id;",
- "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id",
- )
- self.validate_identity(
"SELECT SPLIT(foo)",
"SELECT SPLIT(foo, ',')",
)
@@ -659,6 +666,13 @@ class TestBigQuery(Validator):
},
)
self.validate_all(
+ "SELECT CAST(STRUCT(1) AS STRUCT<INT64>)",
+ write={
+ "bigquery": "SELECT CAST(STRUCT(1) AS STRUCT<INT64>)",
+ "snowflake": "SELECT CAST(OBJECT_CONSTRUCT('_0', 1) AS OBJECT)",
+ },
+ )
+ self.validate_all(
"cast(x as date format 'MM/DD/YYYY')",
write={
"bigquery": "PARSE_DATE('%m/%d/%Y', x)",
@@ -724,10 +738,10 @@ class TestBigQuery(Validator):
self.validate_all(
"'\\\\'",
write={
- "bigquery": r"'\\'",
- "duckdb": r"'\\'",
- "presto": r"'\\'",
- "hive": r"'\\'",
+ "bigquery": "'\\\\'",
+ "duckdb": "'\\'",
+ "presto": "'\\'",
+ "hive": "'\\\\'",
},
)
self.validate_all(
@@ -1004,20 +1018,28 @@ class TestBigQuery(Validator):
},
)
self.validate_all(
- "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])",
+ "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) AS tab",
read={
- "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])",
+ "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) as tab",
"snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
"spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)",
},
)
self.validate_all(
- "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])",
+ "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1",
read={
- "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])",
+ "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1",
+ "postgres": "SELECT * FROM (VALUES (1)) AS t1",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2",
+ read={
+ "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2",
"postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)",
},
)
+
self.validate_all(
"SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table",
write={
@@ -1050,28 +1072,43 @@ class TestBigQuery(Validator):
)
self.validate_all(
"""SELECT
- `u`.`harness_user_email` AS `harness_user_email`,
- `d`.`harness_user_id` AS `harness_user_id`,
- `harness_account_id` AS `harness_account_id`
-FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`harness_cluster_details`) AS `d`, UNNEST(`d`.`harness_account_ids`) AS `harness_account_id`
+ `u`.`user_email` AS `user_email`,
+ `d`.`user_id` AS `user_id`,
+ `account_id` AS `account_id`
+FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`cluster_details`) AS `d`, UNNEST(`d`.`account_ids`) AS `account_id`
WHERE
- NOT `harness_account_id` IS NULL""",
+ NOT `account_id` IS NULL""",
read={
"": """
SELECT
- "u"."harness_user_email" AS "harness_user_email",
- "_q_0"."d"."harness_user_id" AS "harness_user_id",
- "_q_1"."harness_account_id" AS "harness_account_id"
+ "u"."user_email" AS "user_email",
+ "_q_0"."d"."user_id" AS "user_id",
+ "_q_1"."account_id" AS "account_id"
FROM
"analytics_staging"."stg_mongodb__users" AS "u",
- UNNEST("u"."harness_cluster_details") AS "_q_0"("d"),
- UNNEST("_q_0"."d"."harness_account_ids") AS "_q_1"("harness_account_id")
+ UNNEST("u"."cluster_details") AS "_q_0"("d"),
+ UNNEST("_q_0"."d"."account_ids") AS "_q_1"("account_id")
WHERE
- NOT "_q_1"."harness_account_id" IS NULL
+ NOT "_q_1"."account_id" IS NULL
"""
},
pretty=True,
)
+ self.validate_all(
+ "SELECT MOD(x, 10)",
+ read={"postgres": "SELECT x % 10"},
+ write={
+ "bigquery": "SELECT MOD(x, 10)",
+ "postgres": "SELECT x % 10",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST(x AS DATETIME)",
+ write={
+ "": "SELECT CAST(x AS TIMESTAMP)",
+ "bigquery": "SELECT CAST(x AS DATETIME)",
+ },
+ )
def test_errors(self):
with self.assertRaises(TokenError):
diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py
index edf3da1..c5f9847 100644
--- a/tests/dialects/test_clickhouse.py
+++ b/tests/dialects/test_clickhouse.py
@@ -1,5 +1,6 @@
from sqlglot import exp, parse_one
from tests.dialects.test_dialect import Validator
+from sqlglot.errors import ErrorLevel
class TestClickhouse(Validator):
@@ -153,7 +154,9 @@ class TestClickhouse(Validator):
self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
self.validate_identity("TRUNCATE DATABASE db")
self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
-
+ self.validate_identity(
+ "CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)"
+ )
self.validate_all(
"SELECT arrayJoin([1,2,3])",
write={
@@ -390,6 +393,22 @@ class TestClickhouse(Validator):
)
self.validate_identity("SYSTEM STOP MERGES foo.bar", check_command_warning=True)
+ self.validate_identity(
+ "INSERT INTO FUNCTION s3('url', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table"
+ )
+ self.validate_identity(
+ "INSERT INTO FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"
+ )
+ self.validate_identity(
+ """INSERT INTO TABLE FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""",
+ """INSERT INTO FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""",
+ )
+
+ self.validate_identity("SELECT 1 FORMAT TabSeparated")
+ self.validate_identity("SELECT * FROM t FORMAT TabSeparated")
+ self.validate_identity("SELECT FORMAT")
+ self.validate_identity("1 AS FORMAT").assert_is(exp.Alias)
+
def test_cte(self):
self.validate_identity("WITH 'x' AS foo SELECT foo")
self.validate_identity("WITH ['c'] AS field_names SELECT field_names")
@@ -401,6 +420,13 @@ class TestClickhouse(Validator):
self.assertIsInstance(query.args["with"].expressions[0].this, exp.Subquery)
self.assertEqual(query.args["with"].expressions[0].alias, "y")
+ query = "WITH 1 AS var SELECT var"
+ for error_level in [ErrorLevel.IGNORE, ErrorLevel.RAISE, ErrorLevel.IMMEDIATE]:
+ self.assertEqual(
+ self.parse_one(query, error_level=error_level).sql(dialect=self.dialect),
+ query,
+ )
+
def test_ternary(self):
self.validate_all("x ? 1 : 2", write={"clickhouse": "CASE WHEN x THEN 1 ELSE 2 END"})
self.validate_all(
diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py
index 5faed51..76ab94b 100644
--- a/tests/dialects/test_dialect.py
+++ b/tests/dialects/test_dialect.py
@@ -17,8 +17,8 @@ from sqlglot.parser import logger as parser_logger
class Validator(unittest.TestCase):
dialect = None
- def parse_one(self, sql):
- return parse_one(sql, read=self.dialect)
+ def parse_one(self, sql, **kwargs):
+ return parse_one(sql, read=self.dialect, **kwargs)
def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False):
if check_command_warning:
@@ -611,7 +611,7 @@ class TestDialect(Validator):
write={
"duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))",
"hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')",
- "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%m-%d'))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(CAST('2020-01-01' AS VARCHAR), 'yyyy-MM-dd')))",
"starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
"doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
},
@@ -700,7 +700,7 @@ class TestDialect(Validator):
"hive": "TO_DATE(x)",
"postgres": "CAST(x AS DATE)",
"presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)",
- "snowflake": "CAST(x AS DATE)",
+ "snowflake": "TO_DATE(x)",
"doris": "TO_DATE(x)",
"mysql": "DATE(x)",
},
@@ -961,6 +961,7 @@ class TestDialect(Validator):
"presto": "CAST(x AS DATE)",
"spark": "CAST(x AS DATE)",
"sqlite": "x",
+ "tsql": "CAST(x AS DATE)",
},
)
self.validate_all(
@@ -1509,7 +1510,7 @@ class TestDialect(Validator):
"POSITION(needle, haystack, pos)",
write={
"drill": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1",
- "presto": "STRPOS(haystack, needle, pos)",
+ "presto": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1",
"spark": "LOCATE(needle, haystack, pos)",
"clickhouse": "position(haystack, needle, pos)",
"snowflake": "POSITION(needle, haystack, pos)",
@@ -1719,6 +1720,11 @@ class TestDialect(Validator):
with self.subTest(f"{expression.__class__.__name__} {dialect} -> {expected}"):
self.assertEqual(expected, expression.sql(dialect=dialect))
+ self.assertEqual(
+ parse_one("CAST(x AS DECIMAL) / y", read="mysql").sql(dialect="postgres"),
+ "CAST(x AS DECIMAL) / NULLIF(y, 0)",
+ )
+
def test_limit(self):
self.validate_all(
"SELECT * FROM data LIMIT 10, 20",
@@ -2054,6 +2060,44 @@ SELECT
)
def test_logarithm(self):
+ for base in (2, 10):
+ with self.subTest(f"Transpiling LOG base {base}"):
+ self.validate_all(
+ f"LOG({base}, a)",
+ read={
+ "": f"LOG{base}(a)",
+ "bigquery": f"LOG{base}(a)",
+ "clickhouse": f"LOG{base}(a)",
+ "databricks": f"LOG{base}(a)",
+ "duckdb": f"LOG{base}(a)",
+ "mysql": f"LOG{base}(a)",
+ "postgres": f"LOG{base}(a)",
+ "presto": f"LOG{base}(a)",
+ "spark": f"LOG{base}(a)",
+ "sqlite": f"LOG{base}(a)",
+ "trino": f"LOG{base}(a)",
+ "tsql": f"LOG{base}(a)",
+ },
+ write={
+ "bigquery": f"LOG(a, {base})",
+ "clickhouse": f"LOG{base}(a)",
+ "duckdb": f"LOG({base}, a)",
+ "mysql": f"LOG({base}, a)",
+ "oracle": f"LOG({base}, a)",
+ "postgres": f"LOG({base}, a)",
+ "presto": f"LOG{base}(a)",
+ "redshift": f"LOG({base}, a)",
+ "snowflake": f"LOG({base}, a)",
+ "spark2": f"LOG({base}, a)",
+ "spark": f"LOG({base}, a)",
+ "sqlite": f"LOG({base}, a)",
+ "starrocks": f"LOG({base}, a)",
+ "tableau": f"LOG(a, {base})",
+ "trino": f"LOG({base}, a)",
+ "tsql": f"LOG(a, {base})",
+ },
+ )
+
self.validate_all(
"LOG(x)",
read={
@@ -2082,6 +2126,7 @@ SELECT
"bigquery": "LOG(n, b)",
"databricks": "LOG(b, n)",
"drill": "LOG(b, n)",
+ "duckdb": "LOG(b, n)",
"hive": "LOG(b, n)",
"mysql": "LOG(b, n)",
"oracle": "LOG(b, n)",
@@ -2089,8 +2134,13 @@ SELECT
"snowflake": "LOG(b, n)",
"spark": "LOG(b, n)",
"sqlite": "LOG(b, n)",
+ "trino": "LOG(b, n)",
"tsql": "LOG(n, b)",
},
+ write={
+ "clickhouse": UnsupportedError,
+ "presto": UnsupportedError,
+ },
)
def test_count_if(self):
@@ -2190,7 +2240,28 @@ SELECT
"WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq",
write={
"duckdb": "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq",
- "tsql": "WITH t1(x) AS (SELECT 1), t2(y) AS (SELECT 2) SELECT * FROM (SELECT y AS y FROM t2) AS subq",
+ "tsql": "WITH t2(y) AS (SELECT 2), t1(x) AS (SELECT 1) SELECT * FROM (SELECT y AS y FROM t2) AS subq",
+ },
+ )
+ self.validate_all(
+ """
+WITH c AS (
+ WITH b AS (
+ WITH a1 AS (
+ SELECT 1
+ ), a2 AS (
+ SELECT 2
+ )
+ SELECT * FROM a1, a2
+ )
+ SELECT *
+ FROM b
+)
+SELECT *
+FROM c""",
+ write={
+ "duckdb": "WITH c AS (WITH b AS (WITH a1 AS (SELECT 1), a2 AS (SELECT 2) SELECT * FROM a1, a2) SELECT * FROM b) SELECT * FROM c",
+ "hive": "WITH a1 AS (SELECT 1), a2 AS (SELECT 2), b AS (SELECT * FROM a1, a2), c AS (SELECT * FROM b) SELECT * FROM c",
},
)
@@ -2312,3 +2383,37 @@ SELECT
self.validate_identity("TRUNCATE TABLE db.schema.test")
self.validate_identity("TRUNCATE TABLE IF EXISTS db.schema.test")
self.validate_identity("TRUNCATE TABLE t1, t2, t3")
+
+ def test_create_sequence(self):
+ self.validate_identity("CREATE SEQUENCE seq")
+ self.validate_identity(
+ "CREATE TEMPORARY SEQUENCE seq AS SMALLINT START WITH 3 INCREMENT BY 2 MINVALUE 1 MAXVALUE 10 CACHE 1 NO CYCLE OWNED BY table.col"
+ )
+ self.validate_identity(
+ "CREATE SEQUENCE seq START WITH 1 NO MINVALUE NO MAXVALUE CYCLE NO CACHE"
+ )
+ self.validate_identity("CREATE OR REPLACE TEMPORARY SEQUENCE seq INCREMENT BY 1 NO CYCLE")
+ self.validate_identity(
+ "CREATE OR REPLACE SEQUENCE IF NOT EXISTS seq COMMENT='test comment' ORDER"
+ )
+ self.validate_identity(
+ "CREATE SEQUENCE schema.seq SHARING=METADATA NOORDER NOKEEP SCALE EXTEND SHARD EXTEND SESSION"
+ )
+ self.validate_identity(
+ "CREATE SEQUENCE schema.seq SHARING=DATA ORDER KEEP NOSCALE NOSHARD GLOBAL"
+ )
+ self.validate_identity(
+ "CREATE SEQUENCE schema.seq SHARING=DATA NOCACHE NOCYCLE SCALE NOEXTEND"
+ )
+ self.validate_identity(
+ """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE OWNED BY NONE""",
+ """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE""",
+ )
+ self.validate_identity(
+ """CREATE TEMPORARY SEQUENCE seq START 1""",
+ """CREATE TEMPORARY SEQUENCE seq START WITH 1""",
+ )
+ self.validate_identity(
+ """CREATE TEMPORARY SEQUENCE seq START WITH = 1 INCREMENT BY = 2""",
+ """CREATE TEMPORARY SEQUENCE seq START WITH 1 INCREMENT BY 2""",
+ )
diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py
index 41c02fb..634c247 100644
--- a/tests/dialects/test_drill.py
+++ b/tests/dialects/test_drill.py
@@ -5,68 +5,17 @@ class TestDrill(Validator):
dialect = "drill"
def test_drill(self):
- self.validate_all(
- "DATE_FORMAT(a, 'yyyy')",
- write={"drill": "TO_CHAR(a, 'yyyy')"},
+ self.validate_identity(
+ "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))"
+ )
+ self.validate_identity(
+ "SELECT * FROM (SELECT * FROM t) PIVOT(avg(c1) AS ac1 FOR c2 IN ('V' AS v))",
)
- def test_string_literals(self):
self.validate_all(
"SELECT '2021-01-01' + INTERVAL 1 MONTH",
write={
+ "drill": "SELECT '2021-01-01' + INTERVAL '1' MONTH",
"mysql": "SELECT '2021-01-01' + INTERVAL '1' MONTH",
},
)
-
- def test_quotes(self):
- self.validate_all(
- "'\\''",
- write={
- "duckdb": "''''",
- "presto": "''''",
- "hive": "'\\''",
- "spark": "'\\''",
- },
- )
- self.validate_all(
- "'\"x\"'",
- write={
- "duckdb": "'\"x\"'",
- "presto": "'\"x\"'",
- "hive": "'\"x\"'",
- "spark": "'\"x\"'",
- },
- )
- self.validate_all(
- "'\\\\a'",
- read={
- "presto": "'\\\\a'",
- },
- write={
- "duckdb": "'\\\\a'",
- "presto": "'\\\\a'",
- "hive": "'\\\\a'",
- "spark": "'\\\\a'",
- },
- )
-
- def test_table_function(self):
- self.validate_all(
- "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))",
- write={
- "drill": "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))",
- },
- )
-
- def test_validate_pivot(self):
- self.validate_all(
- "SELECT * FROM (SELECT education_level, salary, marital_status, "
- "EXTRACT(year FROM age(birth_date)) age FROM cp.`employee.json`) PIVOT (avg(salary) AS "
- "avg_salary, avg(age) AS avg_age FOR marital_status IN ('M' married, 'S' single))",
- write={
- "drill": "SELECT * FROM (SELECT education_level, salary, marital_status, "
- "EXTRACT(year FROM age(birth_date)) AS age FROM cp.`employee.json`) "
- "PIVOT(avg(salary) AS avg_salary, avg(age) AS avg_age FOR marital_status "
- "IN ('M' AS married, 'S' AS single))"
- },
- )
diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py
index 58d1f06..5a7e93e 100644
--- a/tests/dialects/test_duckdb.py
+++ b/tests/dialects/test_duckdb.py
@@ -1,5 +1,6 @@
from sqlglot import ErrorLevel, UnsupportedError, exp, parse_one, transpile
from sqlglot.helper import logger as helper_logger
+from sqlglot.optimizer.annotate_types import annotate_types
from tests.dialects.test_dialect import Validator
@@ -7,6 +8,31 @@ class TestDuckDB(Validator):
dialect = "duckdb"
def test_duckdb(self):
+ query = "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT t.col['b'] FROM _data, UNNEST(_data.col) AS t(col) WHERE t.col['a'] = 1"
+ expr = annotate_types(self.validate_identity(query))
+ self.assertEqual(
+ expr.sql(dialect="bigquery"),
+ "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1",
+ )
+
+ self.validate_all(
+ "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ read={
+ "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
+ read={
+ "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
+ "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
+ },
+ write={
+ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
+ "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
+ },
+ )
self.validate_all(
'STRUCT_PACK("a b" := 1)',
write={
@@ -15,7 +41,25 @@ class TestDuckDB(Validator):
"snowflake": "OBJECT_CONSTRUCT('a b', 1)",
},
)
-
+ self.validate_all(
+ "ARRAY_TO_STRING(arr, delim)",
+ read={
+ "bigquery": "ARRAY_TO_STRING(arr, delim)",
+ "postgres": "ARRAY_TO_STRING(arr, delim)",
+ "presto": "ARRAY_JOIN(arr, delim)",
+ "snowflake": "ARRAY_TO_STRING(arr, delim)",
+ "spark": "ARRAY_JOIN(arr, delim)",
+ },
+ write={
+ "bigquery": "ARRAY_TO_STRING(arr, delim)",
+ "duckdb": "ARRAY_TO_STRING(arr, delim)",
+ "postgres": "ARRAY_TO_STRING(arr, delim)",
+ "presto": "ARRAY_JOIN(arr, delim)",
+ "snowflake": "ARRAY_TO_STRING(arr, delim)",
+ "spark": "ARRAY_JOIN(arr, delim)",
+ "tsql": "STRING_AGG(arr, delim)",
+ },
+ )
self.validate_all(
"SELECT SUM(X) OVER (ORDER BY x)",
write={
@@ -131,13 +175,6 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
- "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data",
- write={
- "bigquery": "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT (SELECT col.b FROM UNNEST(col) AS col WHERE col.a = 1) FROM _data",
- "duckdb": "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data",
- },
- )
- self.validate_all(
"SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table",
read={
"bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table",
@@ -201,6 +238,9 @@ class TestDuckDB(Validator):
parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
)
+ self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])")
+ self.validate_identity("SELECT MAP {'x': 1}")
+ self.validate_identity("SELECT df1.*, df2.* FROM df1 POSITIONAL JOIN df2")
self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)")
self.validate_identity("MAKE_TIMESTAMP(1667810584123456)")
self.validate_identity("SELECT EPOCH_MS(10) AS t")
@@ -235,6 +275,18 @@ class TestDuckDB(Validator):
"""SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""",
)
self.validate_identity(
+ "SELECT ($$hello)'world$$)",
+ "SELECT ('hello)''world')",
+ )
+ self.validate_identity(
+ "SELECT $$foo$$",
+ "SELECT 'foo'",
+ )
+ self.validate_identity(
+ "SELECT $tag$foo$tag$",
+ "SELECT 'foo'",
+ )
+ self.validate_identity(
"JSON_EXTRACT(x, '$.family')",
"x -> '$.family'",
)
@@ -679,7 +731,19 @@ class TestDuckDB(Validator):
},
)
self.validate_identity(
- "[x.STRING_SPLIT(' ')[1] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]"
+ "[x.STRING_SPLIT(' ')[i] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]"
+ )
+ self.validate_identity(
+ """SELECT LIST_VALUE(1)[i]""",
+ """SELECT ([1])[i]""",
+ )
+ self.validate_identity(
+ """{'x': LIST_VALUE(1)[i]}""",
+ """{'x': ([1])[i]}""",
+ )
+ self.validate_identity(
+ """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': LIST_VALUE(1, 2, 3)[i], 'f2': LIST_VALUE(1, 2, 3)[i]})""",
+ """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': ([1, 2, 3])[i], 'f2': ([1, 2, 3])[i]})""",
)
self.assertEqual(
@@ -689,8 +753,6 @@ class TestDuckDB(Validator):
"WARNING:sqlglot:Applying array index offset (1)",
"WARNING:sqlglot:Applying array index offset (1)",
"WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
],
)
@@ -702,7 +764,7 @@ class TestDuckDB(Validator):
"SELECT MAKE_DATE(2016, 12, 25)", read={"bigquery": "SELECT DATE(2016, 12, 25)"}
)
self.validate_all(
- "SELECT CAST(CAST('2016-12-25 23:59:59' AS DATETIME) AS DATE)",
+ "SELECT CAST(CAST('2016-12-25 23:59:59' AS TIMESTAMP) AS DATE)",
read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"},
)
self.validate_all(
@@ -724,7 +786,7 @@ class TestDuckDB(Validator):
write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"},
)
self.validate_all(
- "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
+ "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - (DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7 % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
@@ -952,7 +1014,7 @@ class TestDuckDB(Validator):
"hive": "CAST(COL AS ARRAY<BIGINT>)",
"spark": "CAST(COL AS ARRAY<BIGINT>)",
"postgres": "CAST(COL AS BIGINT[])",
- "snowflake": "CAST(COL AS ARRAY)",
+ "snowflake": "CAST(COL AS ARRAY(BIGINT))",
},
)
self.validate_all(
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
index b892dd6..33294ee 100644
--- a/tests/dialects/test_hive.py
+++ b/tests/dialects/test_hive.py
@@ -235,15 +235,18 @@ class TestHive(Validator):
},
)
self.validate_all(
- "'\\\\a'",
+ "'\\\\\\\\a'",
read={
+ "drill": "'\\\\\\\\a'",
+ "duckdb": "'\\\\a'",
"presto": "'\\\\a'",
},
write={
+ "drill": "'\\\\\\\\a'",
"duckdb": "'\\\\a'",
+ "hive": "'\\\\\\\\a'",
"presto": "'\\\\a'",
- "hive": "'\\\\a'",
- "spark": "'\\\\a'",
+ "spark": "'\\\\\\\\a'",
},
)
@@ -369,7 +372,7 @@ class TestHive(Validator):
"UNIX_TIMESTAMP(x)",
write={
"duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))",
- "presto": "TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(CAST(x AS VARCHAR), 'yyyy-MM-dd HH:mm:ss')))",
"hive": "UNIX_TIMESTAMP(x)",
"spark": "UNIX_TIMESTAMP(x)",
"": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')",
@@ -563,7 +566,7 @@ class TestHive(Validator):
"LOCATE('a', x, 3)",
write={
"duckdb": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1",
- "presto": "STRPOS(x, 'a', 3)",
+ "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1",
"hive": "LOCATE('a', x, 3)",
"spark": "LOCATE('a', x, 3)",
},
@@ -654,15 +657,6 @@ class TestHive(Validator):
},
)
self.validate_all(
- "LOG(10, 2)",
- write={
- "duckdb": "LOG(10, 2)",
- "presto": "LOG(10, 2)",
- "hive": "LOG(10, 2)",
- "spark": "LOG(10, 2)",
- },
- )
- self.validate_all(
'ds = "2020-01-01"',
write={
"duckdb": "ds = '2020-01-01'",
@@ -745,13 +739,12 @@ class TestHive(Validator):
)
def test_escapes(self) -> None:
- self.validate_identity("'\n'")
+ self.validate_identity("'\n'", "'\\n'")
self.validate_identity("'\\n'")
- self.validate_identity("'\\\n'")
+ self.validate_identity("'\\\n'", "'\\\\\\n'")
self.validate_identity("'\\\\n'")
self.validate_identity("''")
self.validate_identity("'\\\\'")
- self.validate_identity("'\\z'")
self.validate_identity("'\\\\z'")
def test_data_type(self):
diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py
index 5f23c44..7a9d6bf 100644
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@@ -86,9 +86,16 @@ class TestMySQL(Validator):
"ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT",
)
self.validate_identity(
+ "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT",
+ )
+ self.validate_identity(
"CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP) DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC",
"CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP() ON UPDATE CURRENT_TIMESTAMP()) DEFAULT CHARACTER SET=utf8 ROW_FORMAT=DYNAMIC",
)
+ self.validate_identity(
+ "CREATE TABLE `foo` (a VARCHAR(10), KEY idx_a (a DESC))",
+ "CREATE TABLE `foo` (a VARCHAR(10), INDEX idx_a (a DESC))",
+ )
self.validate_all(
"CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'",
@@ -510,9 +517,8 @@ class TestMySQL(Validator):
)
def test_mysql_time(self):
- self.validate_identity("FROM_UNIXTIME(a, b)")
- self.validate_identity("FROM_UNIXTIME(a, b, c)")
self.validate_identity("TIME_STR_TO_UNIX(x)", "UNIX_TIMESTAMP(x)")
+ self.validate_identity("SELECT FROM_UNIXTIME(1711366265, '%Y %D %M')")
self.validate_all(
"SELECT TO_DAYS(x)",
write={
@@ -578,6 +584,17 @@ class TestMySQL(Validator):
self.validate_all(
"STR_TO_DATE(x, '%Y-%m-%dT%T')", write={"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')"}
)
+ self.validate_all(
+ "SELECT FROM_UNIXTIME(col)",
+ read={
+ "postgres": "SELECT TO_TIMESTAMP(col)",
+ },
+ write={
+ "mysql": "SELECT FROM_UNIXTIME(col)",
+ "postgres": "SELECT TO_TIMESTAMP(col)",
+ "redshift": "SELECT (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')",
+ },
+ )
def test_mysql(self):
self.validate_all(
@@ -723,6 +740,52 @@ class TestMySQL(Validator):
"postgres": "STRING_AGG(DISTINCT x, '' ORDER BY y DESC NULLS LAST)",
},
)
+ self.validate_all(
+ "GROUP_CONCAT(a, b, c SEPARATOR ',')",
+ write={
+ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR ',')",
+ "sqlite": "GROUP_CONCAT(a || b || c, ',')",
+ "tsql": "STRING_AGG(CONCAT(a, b, c), ',')",
+ "postgres": "STRING_AGG(CONCAT(a, b, c), ',')",
+ "presto": "ARRAY_JOIN(ARRAY_AGG(CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR), CAST(c AS VARCHAR))), ',')",
+ },
+ )
+ self.validate_all(
+ "GROUP_CONCAT(a, b, c SEPARATOR '')",
+ write={
+ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR '')",
+ "sqlite": "GROUP_CONCAT(a || b || c, '')",
+ "tsql": "STRING_AGG(CONCAT(a, b, c), '')",
+ "postgres": "STRING_AGG(CONCAT(a, b, c), '')",
+ },
+ )
+ self.validate_all(
+ "GROUP_CONCAT(DISTINCT a, b, c SEPARATOR '')",
+ write={
+ "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) SEPARATOR '')",
+ "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')",
+ "tsql": "STRING_AGG(CONCAT(a, b, c), '')",
+ "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '')",
+ },
+ )
+ self.validate_all(
+ "GROUP_CONCAT(a, b, c ORDER BY d SEPARATOR '')",
+ write={
+ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) ORDER BY d SEPARATOR '')",
+ "sqlite": "GROUP_CONCAT(a || b || c, '')",
+ "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)",
+ "postgres": "STRING_AGG(CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)",
+ },
+ )
+ self.validate_all(
+ "GROUP_CONCAT(DISTINCT a, b, c ORDER BY d SEPARATOR '')",
+ write={
+ "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) ORDER BY d SEPARATOR '')",
+ "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')",
+ "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)",
+ "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)",
+ },
+ )
self.validate_identity(
"CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
)
diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py
index 9438507..526b0b5 100644
--- a/tests/dialects/test_oracle.py
+++ b/tests/dialects/test_oracle.py
@@ -94,8 +94,21 @@ class TestOracle(Validator):
"SELECT * FROM t SAMPLE (0.25)",
)
self.validate_identity("SELECT TO_CHAR(-100, 'L99', 'NL_CURRENCY = '' AusDollars '' ')")
+ self.validate_identity(
+ "SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2"
+ )
self.validate_all(
+ "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
+ read={
+ "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
+ },
+ write={
+ "oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
+ "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
+ },
+ )
+ self.validate_all(
"TO_CHAR(x)",
write={
"doris": "CAST(x AS STRING)",
@@ -103,6 +116,59 @@ class TestOracle(Validator):
},
)
self.validate_all(
+ "TO_NUMBER(expr, fmt, nlsparam)",
+ read={
+ "teradata": "TO_NUMBER(expr, fmt, nlsparam)",
+ },
+ write={
+ "oracle": "TO_NUMBER(expr, fmt, nlsparam)",
+ "teradata": "TO_NUMBER(expr, fmt, nlsparam)",
+ },
+ )
+ self.validate_all(
+ "TO_NUMBER(x)",
+ write={
+ "bigquery": "CAST(x AS FLOAT64)",
+ "doris": "CAST(x AS DOUBLE)",
+ "drill": "CAST(x AS DOUBLE)",
+ "duckdb": "CAST(x AS DOUBLE)",
+ "hive": "CAST(x AS DOUBLE)",
+ "mysql": "CAST(x AS DOUBLE)",
+ "oracle": "TO_NUMBER(x)",
+ "postgres": "CAST(x AS DOUBLE PRECISION)",
+ "presto": "CAST(x AS DOUBLE)",
+ "redshift": "CAST(x AS DOUBLE PRECISION)",
+ "snowflake": "TO_NUMBER(x)",
+ "spark": "CAST(x AS DOUBLE)",
+ "spark2": "CAST(x AS DOUBLE)",
+ "starrocks": "CAST(x AS DOUBLE)",
+ "tableau": "CAST(x AS DOUBLE)",
+ "teradata": "TO_NUMBER(x)",
+ },
+ )
+ self.validate_all(
+ "TO_NUMBER(x, fmt)",
+ read={
+ "databricks": "TO_NUMBER(x, fmt)",
+ "drill": "TO_NUMBER(x, fmt)",
+ "postgres": "TO_NUMBER(x, fmt)",
+ "snowflake": "TO_NUMBER(x, fmt)",
+ "spark": "TO_NUMBER(x, fmt)",
+ "redshift": "TO_NUMBER(x, fmt)",
+ "teradata": "TO_NUMBER(x, fmt)",
+ },
+ write={
+ "databricks": "TO_NUMBER(x, fmt)",
+ "drill": "TO_NUMBER(x, fmt)",
+ "oracle": "TO_NUMBER(x, fmt)",
+ "postgres": "TO_NUMBER(x, fmt)",
+ "snowflake": "TO_NUMBER(x, fmt)",
+ "spark": "TO_NUMBER(x, fmt)",
+ "redshift": "TO_NUMBER(x, fmt)",
+ "teradata": "TO_NUMBER(x, fmt)",
+ },
+ )
+ self.validate_all(
"SELECT TO_CHAR(TIMESTAMP '1999-12-01 10:00:00')",
write={
"oracle": "SELECT TO_CHAR(CAST('1999-12-01 10:00:00' AS TIMESTAMP), 'YYYY-MM-DD HH24:MI:SS')",
@@ -210,6 +276,8 @@ class TestOracle(Validator):
self.validate_identity(
"SELECT /*+ LEADING(e j) */ * FROM employees e, departments d, job_history j WHERE e.department_id = d.department_id AND e.hire_date = j.start_date"
)
+ self.validate_identity("INSERT /*+ APPEND */ INTO IAP_TBL (id, col1) VALUES (2, 'test2')")
+ self.validate_identity("INSERT /*+ APPEND_VALUES */ INTO dest_table VALUES (i, 'Value')")
def test_xml_table(self):
self.validate_identity("XMLTABLE('x')")
diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py
index 1d0ea8b..7a41cef 100644
--- a/tests/dialects/test_postgres.py
+++ b/tests/dialects/test_postgres.py
@@ -1,4 +1,4 @@
-from sqlglot import ParseError, UnsupportedError, exp, parse_one, transpile
+from sqlglot import ParseError, UnsupportedError, exp, transpile
from sqlglot.helper import logger as helper_logger
from tests.dialects.test_dialect import Validator
@@ -12,27 +12,12 @@ class TestPostgres(Validator):
self.validate_identity("|/ x", "SQRT(x)")
self.validate_identity("||/ x", "CBRT(x)")
- expr = parse_one(
- "SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)", read="postgres"
- )
+ expr = self.parse_one("SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)")
unnest = expr.args["joins"][0].this.this
unnest.assert_is(exp.Unnest)
alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION"""
- expr = parse_one(alter_table_only, read="postgres")
-
- # Checks that user-defined types are parsed into DataType instead of Identifier
- parse_one("CREATE TABLE t (a udt)", read="postgres").this.expressions[0].args[
- "kind"
- ].assert_is(exp.DataType)
-
- # Checks that OID is parsed into a DataType (ObjectIdentifier)
- self.assertIsInstance(
- parse_one("CREATE TABLE public.propertydata (propertyvalue oid)", read="postgres").find(
- exp.DataType
- ),
- exp.ObjectIdentifier,
- )
+ expr = self.parse_one(alter_table_only)
self.assertIsInstance(expr, exp.AlterTable)
self.assertEqual(expr.sql(dialect="postgres"), alter_table_only)
@@ -55,13 +40,6 @@ class TestPostgres(Validator):
self.validate_identity("CAST(x AS DATEMULTIRANGE)")
self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]")
self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]")
- self.validate_all(
- "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]",
- write={
- "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))",
- "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]",
- },
- )
self.validate_identity("x$")
self.validate_identity("SELECT ARRAY[1, 2, 3]")
self.validate_identity("SELECT ARRAY(SELECT 1)")
@@ -86,6 +64,12 @@ class TestPostgres(Validator):
self.validate_identity("SELECT CURRENT_USER")
self.validate_identity("SELECT * FROM ONLY t1")
self.validate_identity(
+ """UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)"""
+ )
+ self.validate_identity(
+ "WITH t1 AS MATERIALIZED (SELECT 1), t2 AS NOT MATERIALIZED (SELECT 2) SELECT * FROM t1, t2"
+ )
+ self.validate_identity(
"""LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)"""
)
self.validate_identity(
@@ -107,9 +91,6 @@ class TestPostgres(Validator):
"SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)"
)
self.validate_identity(
- "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)"
- )
- self.validate_identity(
"SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1) IN ('ab') THEN 1 ELSE 0 END"
)
self.validate_identity(
@@ -167,6 +148,10 @@ class TestPostgres(Validator):
"SELECT 'Dianne''s horse'",
)
self.validate_identity(
+ "SELECT $$The price is $9.95$$ AS msg",
+ "SELECT 'The price is $9.95' AS msg",
+ )
+ self.validate_identity(
"COMMENT ON TABLE mytable IS $$doc this$$", "COMMENT ON TABLE mytable IS 'doc this'"
)
self.validate_identity(
@@ -329,6 +314,36 @@ class TestPostgres(Validator):
self.validate_identity("SELECT * FROM t1*", "SELECT * FROM t1")
self.validate_all(
+ 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5',
+ write={
+ "bigquery": "SELECT * FROM `test_table` ORDER BY RAND() NULLS LAST LIMIT 5",
+ "duckdb": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5',
+ "postgres": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5',
+ "tsql": "SELECT TOP 5 * FROM [test_table] ORDER BY RAND()",
+ },
+ )
+ self.validate_all(
+ "SELECT (data -> 'en-US') AS acat FROM my_table",
+ write={
+ "duckdb": """SELECT (data -> '$."en-US"') AS acat FROM my_table""",
+ "postgres": "SELECT (data -> 'en-US') AS acat FROM my_table",
+ },
+ )
+ self.validate_all(
+ "SELECT (data ->> 'en-US') AS acat FROM my_table",
+ write={
+ "duckdb": """SELECT (data ->> '$."en-US"') AS acat FROM my_table""",
+ "postgres": "SELECT (data ->> 'en-US') AS acat FROM my_table",
+ },
+ )
+ self.validate_all(
+ "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]",
+ write={
+ "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))",
+ "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]",
+ },
+ )
+ self.validate_all(
"SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t",
read={
"clickhouse": "SELECT JSONExtractString(x, k1, k2, k3) FROM t",
@@ -518,15 +533,6 @@ class TestPostgres(Validator):
},
)
self.validate_all(
- "CREATE TABLE x (a UUID, b BYTEA)",
- write={
- "duckdb": "CREATE TABLE x (a UUID, b BLOB)",
- "presto": "CREATE TABLE x (a UUID, b VARBINARY)",
- "hive": "CREATE TABLE x (a UUID, b BINARY)",
- "spark": "CREATE TABLE x (a UUID, b BINARY)",
- },
- )
- self.validate_all(
"SELECT * FROM x FETCH 1 ROW",
write={
"postgres": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY",
@@ -635,11 +641,30 @@ class TestPostgres(Validator):
"postgres": "x / y ^ z",
},
)
-
- self.assertIsInstance(parse_one("id::UUID", read="postgres"), exp.Cast)
+ self.validate_all(
+ "CAST(x AS NAME)",
+ read={
+ "redshift": "CAST(x AS NAME)",
+ },
+ write={
+ "postgres": "CAST(x AS NAME)",
+ "redshift": "CAST(x AS NAME)",
+ },
+ )
+ self.assertIsInstance(self.parse_one("id::UUID"), exp.Cast)
def test_ddl(self):
- expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres")
+ # Checks that user-defined types are parsed into DataType instead of Identifier
+ self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(
+ exp.DataType
+ )
+
+ # Checks that OID is parsed into a DataType (ObjectIdentifier)
+ self.assertIsInstance(
+ self.parse_one("CREATE TABLE p.t (c oid)").find(exp.DataType), exp.ObjectIdentifier
+ )
+
+ expr = self.parse_one("CREATE TABLE t (x INTERVAL day)")
cdef = expr.find(exp.ColumnDef)
cdef.args["kind"].assert_is(exp.DataType)
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)")
@@ -667,6 +692,21 @@ class TestPostgres(Validator):
self.validate_identity("TRUNCATE TABLE t1 CONTINUE IDENTITY CASCADE")
self.validate_identity("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT")
self.validate_identity(
+ "CREATE TABLE t (vid INT NOT NULL, CONSTRAINT ht_vid_nid_fid_idx EXCLUDE (INT4RANGE(vid, nid) WITH &&, INT4RANGE(fid, fid, '[]') WITH &&))"
+ )
+ self.validate_identity(
+ "CREATE TABLE t (i INT, PRIMARY KEY (i), EXCLUDE USING gist(col varchar_pattern_ops DESC NULLS LAST WITH &&) WITH (sp1=1, sp2=2))"
+ )
+ self.validate_identity(
+ "CREATE TABLE t (i INT, EXCLUDE USING btree(INT4RANGE(vid, nid, '[]') ASC NULLS FIRST WITH &&) INCLUDE (col1, col2))"
+ )
+ self.validate_identity(
+ "CREATE TABLE t (i INT, EXCLUDE USING gin(col1 WITH &&, col2 WITH ||) USING INDEX TABLESPACE tablespace WHERE (id > 5))"
+ )
+ self.validate_identity(
+ "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)"
+ )
+ self.validate_identity(
"CREATE TABLE cust_part3 PARTITION OF customers FOR VALUES WITH (MODULUS 3, REMAINDER 2)"
)
self.validate_identity(
@@ -691,13 +731,13 @@ class TestPostgres(Validator):
"CREATE INDEX index_issues_on_title_trigram ON public.issues USING gin(title public.gin_trgm_ops)"
)
self.validate_identity(
- "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO NOTHING RETURNING *"
+ "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO NOTHING RETURNING *"
)
self.validate_identity(
- "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = 1 RETURNING *"
+ "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = 1 RETURNING *"
)
self.validate_identity(
- "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = excluded.id RETURNING *"
+ "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = excluded.id RETURNING *"
)
self.validate_identity(
"INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT ON CONSTRAINT pkey DO NOTHING RETURNING *"
@@ -740,8 +780,7 @@ class TestPostgres(Validator):
check_command_warning=True,
)
self.validate_identity(
- "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp",
- check_command_warning=True,
+ "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp"
)
self.validate_identity(
"CREATE FUNCTION x(INT) RETURNS INT SET search_path TO 'public'",
@@ -793,7 +832,7 @@ class TestPostgres(Validator):
]))
)
""",
- "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY (ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))",
+ "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY(ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))",
)
self.validate_identity(
"CREATE INDEX index_ci_pipelines_on_project_idandrefandiddesc ON public.ci_pipelines USING btree(project_id, ref, id DESC)"
@@ -803,6 +842,16 @@ class TestPostgres(Validator):
"TRUNCATE TABLE ONLY t1, t2, ONLY t3, t4, t5 RESTART IDENTITY CASCADE",
)
+ self.validate_all(
+ "CREATE TABLE x (a UUID, b BYTEA)",
+ write={
+ "duckdb": "CREATE TABLE x (a UUID, b BLOB)",
+ "presto": "CREATE TABLE x (a UUID, b VARBINARY)",
+ "hive": "CREATE TABLE x (a UUID, b BINARY)",
+ "spark": "CREATE TABLE x (a UUID, b BINARY)",
+ },
+ )
+
with self.assertRaises(ParseError):
transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres")
with self.assertRaises(ParseError):
@@ -857,7 +906,7 @@ class TestPostgres(Validator):
)
def test_operator(self):
- expr = parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3", read="postgres")
+ expr = self.parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3")
expr.left.assert_is(exp.Operator)
expr.left.left.assert_is(exp.Literal)
@@ -926,8 +975,8 @@ class TestPostgres(Validator):
def test_regexp_binary(self):
"""See https://github.com/tobymao/sqlglot/pull/2404 for details."""
- self.assertIsInstance(parse_one("'thomas' ~ '.*thomas.*'", read="postgres"), exp.Binary)
- self.assertIsInstance(parse_one("'thomas' ~* '.*thomas.*'", read="postgres"), exp.Binary)
+ self.assertIsInstance(self.parse_one("'thomas' ~ '.*thomas.*'"), exp.Binary)
+ self.assertIsInstance(self.parse_one("'thomas' ~* '.*thomas.*'"), exp.Binary)
def test_unnest_json_array(self):
trino_input = """
diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py
index 2ea595e..2162499 100644
--- a/tests/dialects/test_presto.py
+++ b/tests/dialects/test_presto.py
@@ -63,7 +63,7 @@ class TestPresto(Validator):
"duckdb": "CAST(a AS INT[])",
"presto": "CAST(a AS ARRAY(INTEGER))",
"spark": "CAST(a AS ARRAY<INT>)",
- "snowflake": "CAST(a AS ARRAY)",
+ "snowflake": "CAST(a AS ARRAY(INT))",
},
)
self.validate_all(
@@ -82,18 +82,17 @@ class TestPresto(Validator):
"duckdb": "CAST([1, 2] AS BIGINT[])",
"presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))",
"spark": "CAST(ARRAY(1, 2) AS ARRAY<BIGINT>)",
- "snowflake": "CAST([1, 2] AS ARRAY)",
+ "snowflake": "CAST([1, 2] AS ARRAY(BIGINT))",
},
)
self.validate_all(
- "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))",
+ "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INT))",
write={
- "bigquery": "CAST(MAP([1], [1]) AS MAP<INT64, INT64>)",
- "duckdb": "CAST(MAP([1], [1]) AS MAP(INT, INT))",
- "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))",
- "hive": "CAST(MAP(1, 1) AS MAP<INT, INT>)",
- "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP<INT, INT>)",
- "snowflake": "CAST(OBJECT_CONSTRUCT(1, 1) AS OBJECT)",
+ "duckdb": "CAST(MAP(['key'], [1]) AS MAP(TEXT, INT))",
+ "presto": "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INTEGER))",
+ "hive": "CAST(MAP('key', 1) AS MAP<STRING, INT>)",
+ "snowflake": "CAST(OBJECT_CONSTRUCT('key', 1) AS MAP(VARCHAR, INT))",
+ "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('key'), ARRAY(1)) AS MAP<STRING, INT>)",
},
)
self.validate_all(
@@ -104,7 +103,7 @@ class TestPresto(Validator):
"presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))",
"hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP<STRING, ARRAY<INT>>)",
"spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>)",
- "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS OBJECT)",
+ "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS MAP(VARCHAR, ARRAY(INT)))",
},
)
self.validate_all(
@@ -178,6 +177,17 @@ class TestPresto(Validator):
"spark": "ARRAY_JOIN(x, '-', 'a')",
},
)
+ self.validate_all(
+ "STRPOS('ABC', 'A', 3)",
+ read={
+ "trino": "STRPOS('ABC', 'A', 3)",
+ },
+ write={
+ "presto": "STRPOS('ABC', 'A', 3)",
+ "trino": "STRPOS('ABC', 'A', 3)",
+ "snowflake": "POSITION('A', 'ABC')",
+ },
+ )
def test_interval_plural_to_singular(self):
# Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals
diff --git a/tests/dialects/test_prql.py b/tests/dialects/test_prql.py
new file mode 100644
index 0000000..9a42d0c
--- /dev/null
+++ b/tests/dialects/test_prql.py
@@ -0,0 +1,17 @@
+from tests.dialects.test_dialect import Validator
+
+
+class TestPRQL(Validator):
+ dialect = "prql"
+
+ def test_prql(self):
+ self.validate_identity("FROM x", "SELECT * FROM x")
+ self.validate_identity("FROM x DERIVE a + 1", "SELECT *, a + 1 FROM x")
+ self.validate_identity("FROM x DERIVE x = a + 1", "SELECT *, a + 1 AS x FROM x")
+ self.validate_identity("FROM x DERIVE {a + 1}", "SELECT *, a + 1 FROM x")
+ self.validate_identity("FROM x DERIVE {x = a + 1, b}", "SELECT *, a + 1 AS x, b FROM x")
+ self.validate_identity("FROM x TAKE 10", "SELECT * FROM x LIMIT 10")
+ self.validate_identity("FROM x TAKE 10 TAKE 5", "SELECT * FROM x LIMIT 5")
+ self.validate_identity(
+ "FROM x DERIVE {x = a + 1, b} SELECT {y = x, 2}", "SELECT a + 1 AS y, 2 FROM x"
+ )
diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py
index 506f429..a91f4f9 100644
--- a/tests/dialects/test_redshift.py
+++ b/tests/dialects/test_redshift.py
@@ -139,6 +139,15 @@ class TestRedshift(Validator):
"presto": "LENGTH(x)",
},
)
+ self.validate_all(
+ "x LIKE 'abc' || '%'",
+ read={
+ "duckdb": "STARTS_WITH(x, 'abc')",
+ },
+ write={
+ "redshift": "x LIKE 'abc' || '%'",
+ },
+ )
self.validate_all(
"SELECT SYSDATE",
@@ -204,18 +213,6 @@ class TestRedshift(Validator):
},
)
self.validate_all(
- "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid",
- write={
- "redshift": "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid",
- },
- )
- self.validate_all(
- 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5',
- write={
- "redshift": 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5'
- },
- )
- self.validate_all(
"SELECT DISTINCT ON (a) a, b FROM x ORDER BY c DESC",
write={
"bigquery": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
@@ -293,6 +290,7 @@ class TestRedshift(Validator):
)
def test_identity(self):
+ self.validate_identity("LISTAGG(DISTINCT foo, ', ')")
self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1")
self.validate_identity("SELECT DATEADD(DAY, 1, 'today')")
self.validate_identity("SELECT * FROM #x")
@@ -306,6 +304,12 @@ class TestRedshift(Validator):
self.validate_identity("SELECT APPROXIMATE AS y")
self.validate_identity("CREATE TABLE t (c BIGINT IDENTITY(0, 1))")
self.validate_identity(
+ "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid"
+ )
+ self.validate_identity(
+ """SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE '%start\\\\_%' LIMIT 5"""
+ )
+ self.validate_identity(
"""SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)"""
)
self.validate_identity(
@@ -463,6 +467,10 @@ FROM (
"": "INSERT INTO t(a, b) SELECT a, b FROM (VALUES (1, 2), (3, 4)) AS t (a, b)",
},
)
+ self.validate_identity("CREATE TABLE table_backup BACKUP NO AS SELECT * FROM event")
+ self.validate_identity("CREATE TABLE table_backup BACKUP YES AS SELECT * FROM event")
+ self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP NO")
+ self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP YES")
def test_create_table_like(self):
self.validate_identity(
@@ -499,7 +507,11 @@ FROM (
def test_varchar_max(self):
self.validate_all(
- "CREATE TABLE TEST (cola VARCHAR(MAX))",
+ 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))',
+ read={
+ "redshift": "CREATE TABLE TEST (cola VARCHAR(max))",
+ "tsql": "CREATE TABLE TEST (cola VARCHAR(max))",
+ },
write={
"redshift": 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))',
},
diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py
index e48f811..a41d35a 100644
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@@ -40,6 +40,19 @@ WHERE
)""",
)
+ self.validate_identity("SELECT TIMEADD(HOUR, 2, CAST('09:05:03' AS TIME))")
+ self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT))")
+ self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS OBJECT(a CHAR NOT NULL))")
+ self.validate_identity("SELECT CAST([1, 2, 3] AS ARRAY(INT))")
+ self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR) RENAME FIELDS)")
+ self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR, y VARCHAR) ADD FIELDS)")
+ self.validate_identity("SELECT TO_TIMESTAMP(123.4)").selects[0].assert_is(exp.Anonymous)
+ self.validate_identity("SELECT TO_TIME(x) FROM t")
+ self.validate_identity("SELECT TO_TIMESTAMP(x) FROM t")
+ self.validate_identity("SELECT TO_TIMESTAMP_NTZ(x) FROM t")
+ self.validate_identity("SELECT TO_TIMESTAMP_LTZ(x) FROM t")
+ self.validate_identity("SELECT TO_TIMESTAMP_TZ(x) FROM t")
+ self.validate_identity("TO_DECIMAL(expr, fmt, precision, scale)")
self.validate_identity("ALTER TABLE authors ADD CONSTRAINT c1 UNIQUE (id, email)")
self.validate_identity("RM @parquet_stage", check_command_warning=True)
self.validate_identity("REMOVE @parquet_stage", check_command_warning=True)
@@ -59,7 +72,6 @@ WHERE
self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')")
self.validate_identity("CAST(x AS GEOMETRY)")
self.validate_identity("OBJECT_CONSTRUCT(*)")
- self.validate_identity("SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'")
self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'")
self.validate_identity("SELECT HLL(*)")
self.validate_identity("SELECT HLL(a)")
@@ -77,18 +89,29 @@ WHERE
self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING")
self.validate_identity("COMMENT IF EXISTS ON TABLE foo IS 'bar'")
self.validate_identity("SELECT CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', col)")
- self.validate_identity("REGEXP_REPLACE('target', 'pattern', '\n')")
self.validate_identity("ALTER TABLE a SWAP WITH b")
+ self.validate_identity("SELECT MATCH_CONDITION")
self.validate_identity(
'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage'
)
self.validate_identity(
"SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)"
)
+ self.validate_identity(
+ "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID"
+ )
+ self.validate_identity(
+ "REGEXP_REPLACE('target', 'pattern', '\n')",
+ "REGEXP_REPLACE('target', 'pattern', '\\n')",
+ )
+ self.validate_identity(
+ "SELECT a:from::STRING, a:from || ' test' ",
+ "SELECT CAST(GET_PATH(a, 'from') AS TEXT), GET_PATH(a, 'from') || ' test'",
+ )
self.validate_identity("x:from", "GET_PATH(x, 'from')")
self.validate_identity(
- "value:values::string",
- "CAST(GET_PATH(value, 'values') AS TEXT)",
+ "value:values::string::int",
+ "CAST(CAST(GET_PATH(value, 'values') AS TEXT) AS INT)",
)
self.validate_identity(
"""SELECT GET_PATH(PARSE_JSON('{"y": [{"z": 1}]}'), 'y[0]:z')""",
@@ -132,7 +155,11 @@ WHERE
)
self.validate_identity(
"v:attr[0]:name",
- "GET_PATH(GET_PATH(v, 'attr[0]'), 'name')",
+ "GET_PATH(v, 'attr[0].name')",
+ )
+ self.validate_identity(
+ "a.x:from.b:c.d::int",
+ "CAST(GET_PATH(a.x, 'from.b.c.d') AS INT)",
)
self.validate_identity(
"""SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""",
@@ -190,10 +217,6 @@ WHERE
"SELECT CEIL(5.3)",
)
self.validate_identity(
- "SELECT TO_TIMESTAMP(x) FROM t",
- "SELECT CAST(x AS TIMESTAMPNTZ) FROM t",
- )
- self.validate_identity(
"CAST(x AS BYTEINT)",
"CAST(x AS INT)",
)
@@ -380,6 +403,7 @@ WHERE
write={
"duckdb": "{'a': b, 'c': d}",
"snowflake": "OBJECT_CONSTRUCT('a', b, 'c', d)",
+ "": "STRUCT(b AS a, d AS c)",
},
)
self.validate_identity("OBJECT_CONSTRUCT(a, b, c, d)")
@@ -419,6 +443,46 @@ WHERE
"sqlite": "SELECT MIN(c1), MIN(c2) FROM test",
},
)
+ for suffix in (
+ "",
+ " OVER ()",
+ ):
+ self.validate_all(
+ f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ read={
+ "snowflake": f"SELECT MEDIAN(x){suffix}",
+ "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ },
+ write={
+ "": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x NULLS LAST){suffix}",
+ "duckdb": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ },
+ )
+ self.validate_all(
+ f"SELECT MEDIAN(x){suffix}",
+ write={
+ "": f"SELECT PERCENTILE_CONT(x, 0.5){suffix}",
+ "duckdb": f"SELECT QUANTILE_CONT(x, 0.5){suffix}",
+ "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
+ },
+ )
+ for func in (
+ "CORR",
+ "COVAR_POP",
+ "COVAR_SAMP",
+ ):
+ self.validate_all(
+ f"SELECT {func}(y, x){suffix}",
+ write={
+ "": f"SELECT {func}(y, x){suffix}",
+ "duckdb": f"SELECT {func}(y, x){suffix}",
+ "postgres": f"SELECT {func}(y, x){suffix}",
+ "snowflake": f"SELECT {func}(y, x){suffix}",
+ },
+ )
self.validate_all(
"TO_CHAR(x, y)",
read={
@@ -560,9 +624,9 @@ WHERE
self.validate_all(
'''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''',
write={
- "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""",
- "mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""",
- "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '["b c"]')""",
+ "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a."b c"'""",
+ "mysql": """SELECT JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a."b c"')""",
+ "snowflake": """SELECT GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a["b c"]')""",
},
)
self.validate_all(
@@ -623,9 +687,16 @@ WHERE
self.validate_all(
"SELECT TO_TIMESTAMP('2013-04-05 01:02:03')",
write={
- "bigquery": "SELECT PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2013-04-05 01:02:03')",
- "snowflake": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-mm-DD hh24:mi:ss')",
- "spark": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-MM-dd HH:mm:ss')",
+ "bigquery": "SELECT CAST('2013-04-05 01:02:03' AS DATETIME)",
+ "snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMPNTZ)",
+ "spark": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)",
+ },
+ )
+ self.validate_all(
+ "SELECT TO_TIME('12:05:00')",
+ write={
+ "bigquery": "SELECT CAST('12:05:00' AS TIME)",
+ "snowflake": "SELECT CAST('12:05:00' AS TIME)",
},
)
self.validate_all(
@@ -667,9 +738,13 @@ WHERE
)
self.validate_all(
"ARRAY_TO_STRING(x, '')",
+ read={
+ "duckdb": "ARRAY_TO_STRING(x, '')",
+ },
write={
"spark": "ARRAY_JOIN(x, '')",
"snowflake": "ARRAY_TO_STRING(x, '')",
+ "duckdb": "ARRAY_TO_STRING(x, '')",
},
)
self.validate_all(
@@ -930,6 +1005,9 @@ WHERE
)
self.validate_all(
"DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))",
+ read={
+ "snowflake": "TIMESTAMPADD(DAY, 5, CAST('2008-12-25' AS DATE))",
+ },
write={
"bigquery": "DATE_ADD(CAST('2008-12-25' AS DATE), INTERVAL 5 DAY)",
"snowflake": "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))",
@@ -952,6 +1030,46 @@ WHERE
self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)")
self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)")
+ self.validate_identity("TO_DATE('12345')").assert_is(exp.Anonymous)
+
+ self.validate_identity(
+ "SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'",
+ "SELECT CAST('2019-02-28' AS DATE) + INTERVAL '1 day, 1 year'",
+ )
+
+ self.validate_identity("DATE(x)").assert_is(exp.Anonymous)
+ self.validate_identity("TO_DATE(x)").assert_is(exp.Anonymous)
+ self.validate_identity("TRY_TO_DATE(x)").assert_is(exp.Anonymous)
+
+ self.validate_all(
+ "TO_DATE(x, 'MM-DD-YYYY')",
+ write={
+ "snowflake": "TO_DATE(x, 'mm-DD-yyyy')",
+ "duckdb": "CAST(STRPTIME(x, '%m-%d-%Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "DATE('01-01-2000', 'MM-DD-YYYY')",
+ write={
+ "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')",
+ "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "TO_DATE('01-01-2000', 'MM-DD-YYYY')",
+ write={
+ "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')",
+ "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "TRY_TO_DATE('01-01-2000', 'MM-DD-YYYY')",
+ write={
+ "snowflake": "TRY_TO_DATE('01-01-2000', 'mm-DD-yyyy')",
+ "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)",
+ },
+ )
+
def test_semi_structured_types(self):
self.validate_identity("SELECT CAST(a AS VARIANT)")
self.validate_identity("SELECT CAST(a AS ARRAY)")
@@ -1047,6 +1165,9 @@ WHERE
self.validate_identity("CREATE TABLE IDENTIFIER('foo') (COLUMN1 VARCHAR, COLUMN2 VARCHAR)")
self.validate_identity("CREATE TABLE IDENTIFIER($foo) (col1 VARCHAR, col2 VARCHAR)")
self.validate_identity(
+ "DROP function my_udf (OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN)))"
+ )
+ self.validate_identity(
"CREATE TABLE orders_clone_restore CLONE orders AT (TIMESTAMP => TO_TIMESTAMP_TZ('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss'))"
)
self.validate_identity(
@@ -1061,6 +1182,17 @@ WHERE
self.validate_identity(
"CREATE OR REPLACE TABLE EXAMPLE_DB.DEMO.USERS (ID DECIMAL(38, 0) NOT NULL, PRIMARY KEY (ID), FOREIGN KEY (CITY_CODE) REFERENCES EXAMPLE_DB.DEMO.CITIES (CITY_CODE))"
)
+ self.validate_identity(
+ "CREATE ICEBERG TABLE my_iceberg_table (amount ARRAY(INT)) CATALOG='SNOWFLAKE' EXTERNAL_VOLUME='my_external_volume' BASE_LOCATION='my/relative/path/from/extvol'"
+ )
+ self.validate_identity(
+ "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS $$ SELECT 'foo' $$",
+ "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS ' SELECT \\'foo\\' '",
+ )
+ self.validate_identity(
+ "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE(col1 ARRAY(INT)) AS $$ WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t $$",
+ "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE (col1 ARRAY(INT)) AS ' WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t '",
+ )
self.validate_all(
"CREATE TABLE orders_clone CLONE orders",
@@ -1292,7 +1424,6 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"spark": "SELECT `c0`, `c1` FROM (VALUES (1, 2), (3, 4)) AS `t0`(`c0`, `c1`)",
},
)
-
self.validate_all(
"""SELECT $1 AS "_1" FROM VALUES ('a'), ('b')""",
write={
@@ -1300,6 +1431,18 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"spark": """SELECT ${1} AS `_1` FROM VALUES ('a'), ('b')""",
},
)
+ self.validate_all(
+ "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x) AS t",
+ read={
+ "duckdb": "SELECT * FROM (VALUES ({'a': 1})) AS t(x)",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x UNION ALL SELECT OBJECT_CONSTRUCT('a', 2)) AS t",
+ read={
+ "duckdb": "SELECT * FROM (VALUES ({'a': 1}), ({'a': 2})) AS t(x)",
+ },
+ )
def test_describe_table(self):
self.validate_all(
diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
index 1cf1ede..18f1fb7 100644
--- a/tests/dialects/test_spark.py
+++ b/tests/dialects/test_spark.py
@@ -1,6 +1,7 @@
from unittest import mock
from sqlglot import exp, parse_one
+from sqlglot.dialects.dialect import Dialects
from tests.dialects.test_dialect import Validator
@@ -245,13 +246,16 @@ TBLPROPERTIES (
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)")
self.validate_identity("REFRESH TABLE a.b.c")
self.validate_identity("INTERVAL -86 DAYS")
- self.validate_identity("SELECT UNIX_TIMESTAMP()")
self.validate_identity("TRIM(' SparkSQL ')")
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("SPLIT(str, pattern, lim)")
self.validate_identity(
+ "SELECT UNIX_TIMESTAMP()",
+ "SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())",
+ )
+ self.validate_identity(
"SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 23 HOUR + 59 MINUTE + 59 SECONDS",
"SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + INTERVAL '59' MINUTE + INTERVAL '59' SECONDS",
)
@@ -281,6 +285,18 @@ TBLPROPERTIES (
)
self.validate_all(
+ "SELECT SPLIT('123|789', '\\\\|')",
+ read={
+ "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')",
+ "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')",
+ },
+ write={
+ "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')",
+ "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')",
+ "spark": "SELECT SPLIT('123|789', '\\\\|')",
+ },
+ )
+ self.validate_all(
"WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
write={
"clickhouse": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
@@ -366,7 +382,7 @@ TBLPROPERTIES (
"hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)",
"redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
- "snowflake": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
+ "snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
},
)
@@ -644,10 +660,10 @@ TBLPROPERTIES (
"SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a STRING, b STRING, c STRING) FROM person WHERE zip_code > 94511"
)
self.validate_identity(
- "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' FROM person"
+ "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' FROM person"
)
self.validate_identity(
- "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') FROM person WHERE zip_code > 94511"
+ "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') FROM person WHERE zip_code > 94511"
)
self.validate_identity(
"SELECT TRANSFORM(zip_code, name, age) USING 'cat' FROM person WHERE zip_code > 94500"
@@ -720,3 +736,16 @@ TBLPROPERTIES (
"presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))",
},
)
+
+ def test_strip_modifiers(self):
+ without_modifiers = "SELECT * FROM t"
+ with_modifiers = f"{without_modifiers} CLUSTER BY y DISTRIBUTE BY x SORT BY z"
+ query = self.parse_one(with_modifiers)
+
+ for dialect in Dialects:
+ with self.subTest(f"Transpiling query with CLUSTER/DISTRIBUTE/SORT BY to {dialect}"):
+ name = dialect.value
+ if name in ("", "databricks", "hive", "spark", "spark2"):
+ self.assertEqual(query.sql(name), with_modifiers)
+ else:
+ self.assertEqual(query.sql(name), without_modifiers)
diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py
index 2421987..f3cde0b 100644
--- a/tests/dialects/test_sqlite.py
+++ b/tests/dialects/test_sqlite.py
@@ -6,58 +6,6 @@ from sqlglot.helper import logger as helper_logger
class TestSQLite(Validator):
dialect = "sqlite"
- def test_ddl(self):
- self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)")
- self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)")
- self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)")
- self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)")
- self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)")
- self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)")
- self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)")
-
- self.validate_all(
- """
- CREATE TABLE "Track"
- (
- CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"),
- FOREIGN KEY ("AlbumId") REFERENCES "Album" (
- "AlbumId"
- ) ON DELETE NO ACTION ON UPDATE NO ACTION,
- FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT,
- FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT
- )
- """,
- write={
- "sqlite": """CREATE TABLE "Track" (
- CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"),
- FOREIGN KEY ("AlbumId") REFERENCES "Album" (
- "AlbumId"
- ) ON DELETE NO ACTION ON UPDATE NO ACTION,
- FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT,
- FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT
-)""",
- },
- pretty=True,
- )
- self.validate_all(
- "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)",
- read={
- "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)",
- },
- write={
- "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)",
- "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)",
- "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)",
- },
- )
- self.validate_all(
- """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""",
- write={
- "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""",
- "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)",
- },
- )
-
def test_sqlite(self):
self.validate_identity("SELECT DATE()")
self.validate_identity("SELECT DATE('now', 'start of month', '+1 month', '-1 day')")
@@ -65,7 +13,6 @@ class TestSQLite(Validator):
self.validate_identity("SELECT DATETIME(1092941466, 'auto')")
self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch', 'localtime')")
self.validate_identity("SELECT UNIXEPOCH()")
- self.validate_identity("SELECT STRFTIME('%s')")
self.validate_identity("SELECT JULIANDAY('now') - JULIANDAY('1776-07-04')")
self.validate_identity("SELECT UNIXEPOCH() - UNIXEPOCH('2004-01-01 02:34:56')")
self.validate_identity("SELECT DATE('now', 'start of year', '+9 months', 'weekday 2')")
@@ -145,6 +92,29 @@ class TestSQLite(Validator):
write={"snowflake": "LEAST(x, y, z)"},
)
+ def test_strftime(self):
+ self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')")
+ self.validate_identity("SELECT STRFTIME('%Y-%m-%d', '2016-10-16', 'start of month')")
+ self.validate_identity(
+ "SELECT STRFTIME('%s')",
+ "SELECT STRFTIME('%s', CURRENT_TIMESTAMP)",
+ )
+
+ self.validate_all(
+ "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')",
+ write={
+ "duckdb": "SELECT STRFTIME(CAST('2020-01-01 12:05:03' AS TIMESTAMP), '%Y-%m-%d')",
+ "sqlite": "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')",
+ },
+ )
+ self.validate_all(
+ "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)",
+ write={
+ "duckdb": "SELECT STRFTIME(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), '%Y-%m-%d')",
+ "sqlite": "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)",
+ },
+ )
+
def test_datediff(self):
self.validate_all(
"DATEDIFF(a, b, 'day')",
@@ -190,3 +160,59 @@ class TestSQLite(Validator):
)
self.assertIn("Named columns are not supported in table alias.", cm.output[0])
+
+ def test_ddl(self):
+ for conflict_action in ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"):
+ with self.subTest(f"ON CONFLICT {conflict_action}"):
+ self.validate_identity("CREATE TABLE a (b, c, UNIQUE (b, c) ON CONFLICT IGNORE)")
+
+ self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)")
+ self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)")
+ self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)")
+ self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)")
+ self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)")
+ self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)")
+ self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)")
+
+ self.validate_all(
+ """
+ CREATE TABLE "Track"
+ (
+ CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"),
+ FOREIGN KEY ("AlbumId") REFERENCES "Album" (
+ "AlbumId"
+ ) ON DELETE NO ACTION ON UPDATE NO ACTION,
+ FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT,
+ FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT
+ )
+ """,
+ write={
+ "sqlite": """CREATE TABLE "Track" (
+ CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"),
+ FOREIGN KEY ("AlbumId") REFERENCES "Album" (
+ "AlbumId"
+ ) ON DELETE NO ACTION ON UPDATE NO ACTION,
+ FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT,
+ FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT
+)""",
+ },
+ pretty=True,
+ )
+ self.validate_all(
+ "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)",
+ read={
+ "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)",
+ },
+ write={
+ "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)",
+ "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)",
+ "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)",
+ },
+ )
+ self.validate_all(
+ """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""",
+ write={
+ "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""",
+ "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)",
+ },
+ )
diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py
index f3894fd..a85ca8c 100644
--- a/tests/dialects/test_teradata.py
+++ b/tests/dialects/test_teradata.py
@@ -5,6 +5,7 @@ class TestTeradata(Validator):
dialect = "teradata"
def test_teradata(self):
+ self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)")
self.validate_identity("SELECT TOP 10 * FROM tbl")
self.validate_identity("SELECT * FROM tbl SAMPLE 5")
self.validate_identity(
@@ -100,7 +101,9 @@ class TestTeradata(Validator):
self.validate_identity(
"CREATE VOLATILE SET TABLE example1 AS (SELECT col1, col2, col3 FROM table1) WITH DATA PRIMARY INDEX (col1) ON COMMIT PRESERVE ROWS"
)
-
+ self.validate_identity(
+ "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)"
+ )
self.validate_all(
"""
CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL,
diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py
index ed474fd..aefd857 100644
--- a/tests/dialects/test_tsql.py
+++ b/tests/dialects/test_tsql.py
@@ -273,6 +273,28 @@ class TestTSQL(Validator):
)
self.validate_all(
+ "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS",
+ read={
+ "postgres": "SELECT * FROM t OFFSET 2",
+ },
+ write={
+ "postgres": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST OFFSET 2",
+ "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY",
+ read={
+ "duckdb": "SELECT * FROM t LIMIT 10 OFFSET 5",
+ "sqlite": "SELECT * FROM t LIMIT 5, 10",
+ "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY",
+ },
+ write={
+ "duckdb": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST LIMIT 10 OFFSET 5",
+ "sqlite": "SELECT * FROM t ORDER BY (SELECT NULL) LIMIT 10 OFFSET 5",
+ },
+ )
+ self.validate_all(
"SELECT CAST([a].[b] AS SMALLINT) FROM foo",
write={
"tsql": "SELECT CAST([a].[b] AS SMALLINT) FROM foo",
@@ -720,6 +742,9 @@ class TestTSQL(Validator):
)
def test_ddl(self):
+ for view_attr in ("ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"):
+ self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x")
+
expression = parse_one("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B", dialect="tsql")
self.assertIsInstance(expression, exp.AlterTable)
self.assertIsInstance(expression.args["actions"][0], exp.Drop)
@@ -1549,7 +1574,7 @@ WHERE
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
"presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))",
"redshift": "LAST_DAY(CAST(GETDATE() AS DATE))",
- "snowflake": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
+ "snowflake": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))",
"spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))",
"tsql": "EOMONTH(CAST(GETDATE() AS DATE))",
},
@@ -1564,7 +1589,7 @@ WHERE
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
"presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
"redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))",
- "snowflake": "LAST_DAY(DATEADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
+ "snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))",
"spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))",
"tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))",
},
diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql
index 6d3bb07..5453a78 100644
--- a/tests/fixtures/identity.sql
+++ b/tests/fixtures/identity.sql
@@ -141,6 +141,7 @@ x ILIKE '%y%' ESCAPE '\'
INTERVAL '1' DAY
INTERVAL '1' MONTH
INTERVAL '1' YEAR
+INTERVAL '1' HOUR TO SECOND
INTERVAL '-1' CURRENT_DATE
INTERVAL '-31' CAST(GETDATE() AS DATE)
INTERVAL (1 + 3) DAYS
@@ -149,6 +150,7 @@ INTERVAL '1' DAY * 5
CASE WHEN TRUE THEN INTERVAL '15' DAYS END
CASE WHEN TRUE THEN 1 ELSE interval END
CASE WHEN TRUE THEN 1 ELSE "INTERVAL" END
+SELECT asof FROM x
SELECT * WHERE interval IS NULL
SELECT * WHERE NOT interval IS NULL
SELECT * WHERE INTERVAL "is" > 1
@@ -176,6 +178,7 @@ COUNT(DISTINCT CASE WHEN DATE_TRUNC('ISOWEEK', DATE(time_field)) = DATE_TRUNC('I
COUNT(a, b)
x[y - 1]
CASE WHEN SUM(x) > 3 THEN 1 END OVER (PARTITION BY x)
+ANY(x) OVER (PARTITION BY x)
SUM(ROW() OVER (PARTITION BY x))
SUM(ROW() OVER (PARTITION BY x + 1))
SUM(ROW() OVER (PARTITION BY x AND y))
@@ -361,7 +364,6 @@ SELECT GREATEST(a, b, c) FROM test
SELECT LAST_VALUE(a) FROM test
SELECT LAST_VALUE(a) IGNORE NULLS OVER () + 1
SELECT LN(a) FROM test
-SELECT LOG10(a) FROM test
SELECT MAX(a) FROM test
SELECT MIN(a) FROM test
SELECT POWER(a, 2) FROM test
@@ -476,6 +478,7 @@ SELECT 1 UNION (SELECT 2) ORDER BY x
SELECT * FROM (((SELECT 1) UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1)
SELECT * FROM ((SELECT 1 AS x) CROSS JOIN (SELECT 2 AS y)) AS z
((SELECT 1) EXCEPT (SELECT 2))
+((SELECT 1)) LIMIT 1
VALUES (1) UNION SELECT * FROM x
WITH a AS (SELECT 1) SELECT a.* FROM a
WITH a AS (SELECT 1), b AS (SELECT 2) SELECT a.*, b.* FROM a CROSS JOIN b
@@ -637,6 +640,8 @@ CREATE DATABASE IF NOT EXISTS y
CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END'
CREATE TABLE T3 AS (SELECT DISTINCT A FROM T1 EXCEPT (SELECT A FROM T2) LIMIT 1)
DESCRIBE x
+DESCRIBE EXTENDED a.b
+DESCRIBE FORMATTED a.b
DROP INDEX a.b.c
DROP FUNCTION a.b.c (INT)
DROP MATERIALIZED VIEW x.y.z
@@ -831,6 +836,7 @@ SELECT * FROM schema.case
SELECT * FROM current_date
SELECT * FROM schema.current_date
SELECT /*+ SOME_HINT(foo) */ 1
+SELECT /*+ REBALANCE */ * FROM foo
SELECT * FROM (tbl1 CROSS JOIN (SELECT * FROM tbl2) AS t1)
/* comment1 */ INSERT INTO x /* comment2 */ VALUES (1, 2, 3)
/* comment1 */ UPDATE tbl /* comment2 */ SET x = 2 WHERE x < 2
@@ -857,3 +863,5 @@ SELECT truncate
SELECT only
TRUNCATE(a, b)
SELECT enum
+SELECT unlogged
+SELECT name
diff --git a/tests/fixtures/optimizer/canonicalize.sql b/tests/fixtures/optimizer/canonicalize.sql
index 98b2f07..e4c78b7 100644
--- a/tests/fixtures/optimizer/canonicalize.sql
+++ b/tests/fixtures/optimizer/canonicalize.sql
@@ -2,7 +2,7 @@ SELECT w.d + w.e AS c FROM w AS w;
SELECT CONCAT("w"."d", "w"."e") AS "c" FROM "w" AS "w";
SELECT CAST(w.d AS DATE) > w.e AS a FROM w AS w;
-SELECT CAST("w"."d" AS DATE) > CAST("w"."e" AS DATE) AS "a" FROM "w" AS "w";
+SELECT CAST("w"."d" AS DATE) > CAST("w"."e" AS DATETIME) AS "a" FROM "w" AS "w";
SELECT CAST(1 AS VARCHAR) AS a FROM w AS w;
SELECT CAST(1 AS VARCHAR) AS "a" FROM "w" AS "w";
@@ -97,6 +97,15 @@ DATE_TRUNC('DAY', CAST('2023-01-01' AS DATE));
DATEDIFF('2023-01-01', '2023-01-02', DAY);
DATEDIFF(CAST('2023-01-01' AS DATETIME), CAST('2023-01-02' AS DATETIME), DAY);
+SELECT "t"."d" > '2023-01-01' AS "d" FROM "temporal" AS "t";
+SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t";
+
+SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t";
+SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t";
+
+SELECT "t"."t" > '2023-01-01 00:00:01' AS "t" FROM "temporal" AS "t";
+SELECT "t"."t" > CAST('2023-01-01 00:00:01' AS DATETIME) AS "t" FROM "temporal" AS "t";
+
--------------------------------------
-- Remove redundant casts
--------------------------------------
diff --git a/tests/fixtures/optimizer/merge_subqueries.sql b/tests/fixtures/optimizer/merge_subqueries.sql
index 0f22925..f953539 100644
--- a/tests/fixtures/optimizer/merge_subqueries.sql
+++ b/tests/fixtures/optimizer/merge_subqueries.sql
@@ -429,4 +429,20 @@ WHERE
q.a AS a
FROM q AS q
);
-SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y); \ No newline at end of file
+SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y);
+
+# title: dont merge when inner query has ORDER BY and outer query is UNION
+WITH q AS (
+ SELECT
+ x.a AS a
+ FROM x
+ ORDER BY x.a
+)
+SELECT
+ q.a AS a
+FROM q
+UNION ALL
+SELECT
+ 1 AS a;
+WITH q AS (SELECT x.a AS a FROM x AS x ORDER BY x.a) SELECT q.a AS a FROM q AS q UNION ALL SELECT 1 AS a;
+
diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql
index 990453b..cc72e6d 100644
--- a/tests/fixtures/optimizer/optimizer.sql
+++ b/tests/fixtures/optimizer/optimizer.sql
@@ -33,16 +33,17 @@ FROM (
WHERE object_pointstext IS NOT NULL
);
CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS
-SELECT
- TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`,
- TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value`
-FROM (
+WITH `_q_1` AS (
SELECT
EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points`
FROM `boo` AS `boo`
WHERE
NOT `boo`.`object_pointstext` IS NULL
-) AS `_q_1`;
+)
+SELECT
+ TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`,
+ TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value`
+FROM `_q_1` AS `_q_1`;
# title: Union in CTE
WITH cte AS (
@@ -480,8 +481,8 @@ JOIN "company_table" AS "company_table_2"
LEFT JOIN "unlocked" AS "unlocked"
ON "company_table_2"."id" = "unlocked"."company_id"
WHERE
- NOT "company_table_2"."id" IS NULL
- AND CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE;
+ CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE
+ AND NOT "company_table_2"."id" IS NULL;
# title: db.table alias clash
# execute: false
@@ -823,7 +824,7 @@ SELECT
FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `TOp_TeRmS`
WHERE
`TOp_TeRmS`.`rank` = 1
- AND CAST(`TOp_TeRmS`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK)
+ AND `TOp_TeRmS`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK)
GROUP BY
`day`,
`top_term`,
@@ -1379,11 +1380,11 @@ JOIN `date_dim` AS `date_dim`
AND `date_dim`.`d_date` >= '2002-02-01'
WHERE
`_u_3`.`_u_4` IS NULL
- AND NOT `_u_0`.`_u_1` IS NULL
AND (
SIZE(`_u_0`.`_u_2`) = 0
OR SIZE(FILTER(`_u_0`.`_u_2`, `_x` -> `cs1`.`cs_warehouse_sk` <> `_x`)) <> 0
)
+ AND NOT `_u_0`.`_u_1` IS NULL
ORDER BY
COUNT(DISTINCT `cs1`.`cs_order_number`)
LIMIT 100;
diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql
index b7103ef..47972ac 100644
--- a/tests/fixtures/optimizer/pushdown_projections.sql
+++ b/tests/fixtures/optimizer/pushdown_projections.sql
@@ -79,6 +79,9 @@ WITH y AS (SELECT MAX(1) AS _ FROM x AS x) SELECT 1 AS "1" FROM y AS y;
WITH y AS (SELECT a FROM x GROUP BY a) SELECT 1 FROM y;
WITH y AS (SELECT 1 AS _ FROM x AS x GROUP BY x.a) SELECT 1 AS "1" FROM y AS y;
+WITH cte AS (SELECT col FROM t) SELECT IF(1 IN UNNEST(col), 1, 0) AS col FROM cte;
+WITH cte AS (SELECT t.col AS col FROM t AS t) SELECT CASE WHEN 1 IN (SELECT UNNEST(cte.col)) THEN 1 ELSE 0 END AS col FROM cte AS cte;
+
--------------------------------------
-- Unknown Star Expansion
--------------------------------------
@@ -106,3 +109,6 @@ WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS
SELECT * FROM ((SELECT c FROM t1) JOIN t2);
SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2);
+
+SELECT a, d FROM (SELECT 1 a, 2 c, 3 d, 4 e UNION ALL BY NAME SELECT 5 b, 6 c, 7 d, 8 a, 9 e)
+SELECT a, d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7 d, 8 a)
diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql
index 71c6f45..289145b 100644
--- a/tests/fixtures/optimizer/qualify_columns.sql
+++ b/tests/fixtures/optimizer/qualify_columns.sql
@@ -96,6 +96,12 @@ SELECT 2 AS "2" FROM x AS x GROUP BY 1;
SELECT 'a' AS a FROM x GROUP BY 1;
SELECT 'a' AS a FROM x AS x GROUP BY 1;
+SELECT NULL AS a FROM x GROUP BY 1;
+SELECT NULL AS a FROM x AS x GROUP BY 1;
+
+SELECT TRUE AS a FROM x GROUP BY 1;
+SELECT TRUE AS a FROM x AS x GROUP BY 1;
+
# execute: false
# dialect: oracle
SELECT t."col" FROM tbl t;
@@ -121,6 +127,10 @@ SELECT 2 AS d FROM x AS x GROUP BY 1 ORDER BY d;
SELECT DATE(a), DATE(b) AS c FROM x GROUP BY 1, 2;
SELECT DATE(x.a) AS _col_0, DATE(x.b) AS c FROM x AS x GROUP BY DATE(x.a), DATE(x.b);
+# execute: false
+SELECT (SELECT MIN(a) FROM UNNEST([1, 2])) AS f FROM x GROUP BY 1;
+SELECT (SELECT MIN(_q_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _q_0) AS f FROM x AS x GROUP BY 1;
+
SELECT SUM(x.a) AS c FROM x JOIN y ON x.b = y.b GROUP BY c;
SELECT SUM(x.a) AS c FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY y.c;
@@ -580,8 +590,8 @@ SELECT * FROM ((SELECT * FROM tbl));
SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _q_0);
# execute: false
-SELECT * FROM ((SELECT c FROM t1) JOIN t2);
-SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2);
+SELECT * FROM ((SELECT c FROM t1) CROSS JOIN t2);
+SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0 CROSS JOIN t2 AS t2);
# execute: false
SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c);
diff --git a/tests/fixtures/optimizer/qualify_columns_ddl.sql b/tests/fixtures/optimizer/qualify_columns_ddl.sql
index 907780b..9b4bb34 100644
--- a/tests/fixtures/optimizer/qualify_columns_ddl.sql
+++ b/tests/fixtures/optimizer/qualify_columns_ddl.sql
@@ -1,6 +1,10 @@
# title: Create with CTE
WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM cte;
-WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT cte.b AS b FROM cte AS cte;
+CREATE TABLE s AS WITH cte AS (SELECT y.b AS b FROM y AS y) SELECT cte.b AS b FROM cte AS cte;
+
+# title: Create with CTE, query also has CTE
+WITH cte1 AS (SELECT b FROM y) CREATE TABLE s AS WITH cte2 AS (SELECT b FROM cte1) SELECT * FROM cte2;
+CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte2.b AS b FROM cte2 AS cte2;
# title: Create without CTE
CREATE TABLE foo AS SELECT a FROM tbl;
@@ -8,15 +12,15 @@ CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl;
# title: Create with complex CTE with derived table
WITH cte AS (SELECT a FROM (SELECT a from x)) CREATE TABLE s AS SELECT * FROM cte;
-WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte;
+CREATE TABLE s AS WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) SELECT cte.a AS a FROM cte AS cte;
# title: Create wtih multiple CTEs
WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2;
-WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte2.b AS b FROM cte2 AS cte2;
+CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte2.b AS b FROM cte2 AS cte2;
# title: Create with multiple CTEs, selecting only from the first CTE (unnecessary code)
WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte1;
-WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte1.b AS b FROM cte1 AS cte1;
+CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte1.b AS b FROM cte1 AS cte1;
# title: Create with multiple derived tables
CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y));
@@ -24,9 +28,10 @@ CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT y.b A
# title: Create with a CTE and a derived table
WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte));
-WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1;
+CREATE TABLE s AS WITH cte AS (SELECT y.b AS b FROM y AS y) SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1;
# title: Insert with CTE
+# dialect: spark
WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte;
WITH cte AS (SELECT y.b AS b FROM y AS y) INSERT INTO s SELECT cte.b AS b FROM cte AS cte;
diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql
index 99b5153..f651a87 100644
--- a/tests/fixtures/optimizer/qualify_tables.sql
+++ b/tests/fixtures/optimizer/qualify_tables.sql
@@ -159,6 +159,7 @@ CREATE TABLE t1 AS (WITH cte AS (SELECT x FROM t2) SELECT * FROM cte);
CREATE TABLE c.db.t1 AS (WITH cte AS (SELECT x FROM c.db.t2 AS t2) SELECT * FROM cte AS cte);
# title: insert statement with cte
+# dialect: spark
WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte;
WITH cte AS (SELECT b FROM c.db.y AS y) INSERT INTO c.db.s SELECT * FROM cte AS cte;
diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql
index da9f26d..a10942d 100644
--- a/tests/fixtures/optimizer/simplify.sql
+++ b/tests/fixtures/optimizer/simplify.sql
@@ -52,6 +52,9 @@ FALSE;
'x' = 'x';
TRUE;
+STRUCT(NULL AS a);
+STRUCT(NULL AS a);
+
NULL AND TRUE;
NULL;
@@ -102,6 +105,10 @@ a AND b;
a AND (b AND b);
a AND b;
+-- bigquery doesn't allow unparenthesis comparisons
+(x is not null) != (y is null);
+(NOT x IS NULL) <> (y IS NULL);
+
--------------------------------------
-- Absorption
--------------------------------------
@@ -459,6 +466,18 @@ CAST('1998-09-02 00:00:00' AS DATETIME);
CAST(x AS DATETIME) + interval '1' WEEK;
CAST(x AS DATETIME) + INTERVAL '1' WEEK;
+# dialect: bigquery
+CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 1 DAY;
+CAST('2023-01-02 00:00:00' AS TIMESTAMP);
+
+# dialect: bigquery
+INTERVAL 1 DAY + CAST('2023-01-01' AS TIMESTAMP);
+CAST('2023-01-02 00:00:00' AS TIMESTAMP);
+
+# dialect: bigquery
+CAST('2023-01-02' AS TIMESTAMP) - INTERVAL 1 DAY;
+CAST('2023-01-01 00:00:00' AS TIMESTAMP);
+
TS_OR_DS_TO_DATE('1998-12-01 00:00:01') - interval '90' day;
CAST('1998-09-02' AS DATE);
@@ -708,6 +727,48 @@ FUN() > 0;
RAND() > 0 OR RAND() > 1;
RAND() > 0 OR RAND() > 1;
+CAST(1 AS UINT) >= 0;
+TRUE;
+
+CAST(-1 AS TINYINT) <= 0;
+TRUE;
+
+CAST(1 AS INT) = CAST(1 AS UINT);
+TRUE;
+
+CASE WHEN CAST(1 AS TINYINT) = 1 THEN FALSE ELSE TRUE END;
+FALSE;
+
+CAST(1 AS INT) + 1;
+CAST(1 AS INT) + 1;
+
+CAST(CAST(CAST(-1 AS INT) AS INT) AS INT) = -1;
+TRUE;
+
+CAST(-1 AS UINT) <= 0;
+CAST(-1 AS UINT) <= 0;
+
+CAST(-129 AS TINYINT) <= 0;
+CAST(-129 AS TINYINT) <= 0;
+
+CAST(256 AS UINT) >= 0;
+CAST(256 AS UINT) >= 0;
+
+CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1;
+CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1;
+
+CAST(x AS TINYINT) = 1;
+CAST(x AS TINYINT) = 1;
+
+CAST(CAST(1 AS INT) AS BOOLEAN) = 1;
+CAST(CAST(1 AS INT) AS BOOLEAN) = 1;
+
+CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1;
+CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1;
+
+x > CAST('2023-01-01' AS DATE) AND x < CAST('2023-01-01' AS DATETIME);
+FALSE;
+
--------------------------------------
-- COALESCE
--------------------------------------
@@ -745,7 +806,7 @@ COALESCE(ROW() OVER (), 1) = 1;
ROW() OVER () = 1 OR ROW() OVER () IS NULL;
a AND b AND COALESCE(ROW() OVER (), 1) = 1;
-a AND b AND (ROW() OVER () = 1 OR ROW() OVER () IS NULL);
+(ROW() OVER () = 1 OR ROW() OVER () IS NULL) AND a AND b;
COALESCE(1, 2);
1;
@@ -824,27 +885,51 @@ DATE_TRUNC(CAST('2023-12-15' AS DATE), WEEK);
CAST('2023-12-10' AS DATE);
# dialect: bigquery
+DATE_TRUNC(CAST('2023-10-01' AS TIMESTAMP), QUARTER);
+CAST('2023-10-01 00:00:00' AS TIMESTAMP);
+
+# dialect: bigquery
DATE_TRUNC(CAST('2023-12-16' AS DATE), WEEK);
CAST('2023-12-10' AS DATE);
DATE_TRUNC('year', x) = CAST('2021-01-01' AS DATE);
x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
+# dialect: bigquery
+DATE_TRUNC(x, year) = CAST('2021-01-01' AS TIMESTAMP);
+x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP);
+
DATE_TRUNC('quarter', x) = CAST('2021-01-01' AS DATE);
x < CAST('2021-04-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
+# dialect: bigquery
+DATE_TRUNC(x, quarter) = CAST('2021-01-01' AS TIMESTAMP);
+x < CAST('2021-04-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP);
+
DATE_TRUNC('month', x) = CAST('2021-01-01' AS DATE);
x < CAST('2021-02-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
+# dialect: bigquery
+DATE_TRUNC(x, month) = CAST('2021-01-01' AS TIMESTAMP);
+x < CAST('2021-02-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP);
+
DATE_TRUNC('week', x) = CAST('2021-01-04' AS DATE);
x < CAST('2021-01-11' AS DATE) AND x >= CAST('2021-01-04' AS DATE);
DATE_TRUNC('day', x) = CAST('2021-01-01' AS DATE);
x < CAST('2021-01-02' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
+# dialect: bigquery
+DATE_TRUNC(x, DAY) = CAST('2021-01-01' AS TIMESTAMP);
+x < CAST('2021-01-02 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP);
+
CAST('2021-01-01' AS DATE) = DATE_TRUNC('year', x);
x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
+# dialect: bigquery
+CAST('2021-01-01' AS TIMESTAMP) = DATE_TRUNC(x, year);
+x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP);
+
-- Always false, except for nulls
DATE_TRUNC('quarter', x) = CAST('2021-01-02' AS DATE);
DATE_TRUNC('QUARTER', x) = CAST('2021-01-02' AS DATE);
@@ -859,12 +944,20 @@ DATE_TRUNC('YEAR', x) <> CAST('2021-01-02' AS DATE);
DATE_TRUNC('year', x) <= CAST('2021-01-01' AS DATE);
x < CAST('2022-01-01' AS DATE);
+# dialect: bigquery
+DATE_TRUNC(x, year) <= CAST('2021-01-01' AS TIMESTAMP);
+x < CAST('2022-01-01 00:00:00' AS TIMESTAMP);
+
DATE_TRUNC('year', x) <= CAST('2021-01-02' AS DATE);
x < CAST('2022-01-01' AS DATE);
CAST('2021-01-01' AS DATE) >= DATE_TRUNC('year', x);
x < CAST('2022-01-01' AS DATE);
+# dialect: bigquery
+CAST('2021-01-01' AS TIMESTAMP) >= DATE_TRUNC(x, year);
+x < CAST('2022-01-01 00:00:00' AS TIMESTAMP);
+
DATE_TRUNC('year', x) < CAST('2021-01-01' AS DATE);
x < CAST('2021-01-01' AS DATE);
@@ -896,6 +989,10 @@ DATE_TRUNC('YEAR', x) <> '2021-01-02';
DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2023-01-01' AS DATE));
(x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE)) OR (x < CAST('2024-01-01' AS DATE) AND x >= CAST('2023-01-01' AS DATE));
+# dialect: bigquery
+DATE_TRUNC(x, year) IN (CAST('2021-01-01' AS TIMESTAMP), CAST('2023-01-01' AS TIMESTAMP));
+(x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP)) OR (x < CAST('2024-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2023-01-01 00:00:00' AS TIMESTAMP));
+
-- merge ranges
DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2022-01-01' AS DATE));
x < CAST('2023-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
diff --git a/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz b/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz
index ad5043f..6f51952 100644
--- a/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz
index eed1508..9a736ff 100644
--- a/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz
index e160514..9092c1f 100644
--- a/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz
index 1828149..ac9058b 100644
--- a/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/customer.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer.csv.gz
index 2277f72..5545923 100644
--- a/tests/fixtures/optimizer/tpc-ds/customer.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/customer.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz
index c553721..7c24e8c 100644
--- a/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz
index dfc65a0..582d4e5 100644
--- a/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz b/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz
index 26280bf..9960663 100644
--- a/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz b/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz
index f0cde03..84efa06 100644
--- a/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz b/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz
index 4374587..8c60109 100644
--- a/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz b/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz
index 5afaaf6..d171ae0 100644
--- a/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/item.csv.gz b/tests/fixtures/optimizer/tpc-ds/item.csv.gz
index 9f65d87..effacb3 100644
--- a/tests/fixtures/optimizer/tpc-ds/item.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/item.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz b/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz
index e8692c2..918e9c1 100644
--- a/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/reason.csv.gz b/tests/fixtures/optimizer/tpc-ds/reason.csv.gz
index de1f50f..2ad5473 100644
--- a/tests/fixtures/optimizer/tpc-ds/reason.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/reason.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz b/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz
index 14465e8..e193902 100644
--- a/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/store.csv.gz b/tests/fixtures/optimizer/tpc-ds/store.csv.gz
index 8d04078..77868fc 100644
--- a/tests/fixtures/optimizer/tpc-ds/store.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/store.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz
index cba1300..d3426ab 100644
--- a/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz
index 68caa83..21e83df 100644
--- a/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz b/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz
index 3e0fa35..fae30e9 100644
--- a/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
index 76e6431..35fbb70 100644
--- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
+++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
@@ -62,6 +62,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 2
--------------------------------------
+# execute: true
WITH wscs
AS (SELECT sold_date_sk,
sales_price
@@ -107,13 +108,13 @@ WITH wscs
WHERE d_date_sk = sold_date_sk
GROUP BY d_week_seq)
SELECT d_week_seq1,
- Round(sun_sales1 / sun_sales2, 2),
- Round(mon_sales1 / mon_sales2, 2),
- Round(tue_sales1 / tue_sales2, 2),
- Round(wed_sales1 / wed_sales2, 2),
- Round(thu_sales1 / thu_sales2, 2),
- Round(fri_sales1 / fri_sales2, 2),
- Round(sat_sales1 / sat_sales2, 2)
+ Round(sun_sales1 / sun_sales2, 2) AS "_col_1",
+ Round(mon_sales1 / mon_sales2, 2) AS "_col_2",
+ Round(tue_sales1 / tue_sales2, 2) AS "_col_3",
+ Round(wed_sales1 / wed_sales2, 2) AS "_col_4",
+ Round(thu_sales1 / thu_sales2, 2) AS "_col_5",
+ Round(fri_sales1 / fri_sales2, 2) AS "_col_6",
+ Round(sat_sales1 / sat_sales2, 2) AS "_col_7"
FROM (SELECT wswscs.d_week_seq d_week_seq1,
sun_sales sun_sales1,
mon_sales mon_sales1,
@@ -213,7 +214,8 @@ JOIN "date_dim" AS "date_dim"
JOIN "wswscs" AS "wswscs_2"
ON "wswscs"."d_week_seq" = "wswscs_2"."d_week_seq" - 53
JOIN "date_dim" AS "date_dim_2"
- ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq" AND "date_dim_2"."d_year" = 1999
+ ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq"
+ AND "date_dim_2"."d_year" = 1999
ORDER BY
"d_week_seq1";
@@ -264,6 +266,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 4
--------------------------------------
+# execute: true
WITH year_total
AS (SELECT c_customer_id customer_id,
c_first_name customer_first_name,
@@ -733,8 +736,8 @@ WITH "salesreturns" AS (
"date_dim"."d_date" AS "d_date"
FROM "date_dim" AS "date_dim"
WHERE
- CAST("date_dim"."d_date" AS DATE) <= CAST('2002-09-05' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-08-22' AS DATE)
+ CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-09-05' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2002-08-22' AS DATE)
), "ssr" AS (
SELECT
"store"."s_store_id" AS "s_store_id",
@@ -1628,6 +1631,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 11
--------------------------------------
+# execute: true
WITH year_total
AS (SELECT c_customer_id customer_id,
c_first_name customer_first_name
@@ -1869,8 +1873,8 @@ SELECT
FROM "web_sales" AS "web_sales"
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-10' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-11' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-10' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-05-11' AS DATE)
JOIN "item" AS "item"
ON "item"."i_category" IN ('Home', 'Men', 'Women')
AND "item"."i_item_sk" = "web_sales"."ws_item_sk"
@@ -2326,8 +2330,9 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 15
--------------------------------------
+# execute: true
SELECT ca_zip,
- Sum(cs_sales_price)
+ Sum(cs_sales_price) AS "_col_1"
FROM catalog_sales,
customer,
customer_address,
@@ -2437,11 +2442,11 @@ JOIN "date_dim" AS "date_dim"
AND "date_dim"."d_date" >= '2002-3-01'
AND (
CAST('2002-3-01' AS DATE) + INTERVAL '60' DAY
- ) >= CAST("date_dim"."d_date" AS DATE)
+ ) >= CAST("date_dim"."d_date" AS DATETIME)
WHERE
"_u_3"."_u_4" IS NULL
- AND NOT "_u_0"."_u_1" IS NULL
AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "cs1"."cs_warehouse_sk" <> "_x")
+ AND NOT "_u_0"."_u_1" IS NULL
ORDER BY
COUNT(DISTINCT "cs1"."cs_order_number")
LIMIT 100;
@@ -2449,6 +2454,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 17
--------------------------------------
+# execute: true
SELECT i_item_id,
i_item_desc,
s_state,
@@ -2638,6 +2644,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 19
--------------------------------------
+# execute: true
SELECT i_brand_id brand_id,
i_brand brand,
i_manufact_id,
@@ -2744,8 +2751,8 @@ SELECT
FROM "catalog_sales" AS "catalog_sales"
JOIN "date_dim" AS "date_dim"
ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-03-05' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-02-03' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-03-05' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2001-02-03' AS DATE)
JOIN "item" AS "item"
ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk"
AND "item"."i_category" IN ('Children', 'Women', 'Electronics')
@@ -2824,8 +2831,8 @@ WITH "x" AS (
FROM "inventory" AS "inventory"
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-12' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-04-13' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-12' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-04-13' AS DATE)
JOIN "item" AS "item"
ON "inventory"."inv_item_sk" = "item"."i_item_sk"
AND "item"."i_current_price" <= 1.49
@@ -2906,6 +2913,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 23
--------------------------------------
+# execute: true
WITH frequent_ss_items
AS (SELECT Substr(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
@@ -2942,7 +2950,7 @@ WITH frequent_ss_items
HAVING Sum(ss_quantity * ss_sales_price) >
( 95 / 100.0 ) * (SELECT *
FROM max_store_sales))
-SELECT Sum(sales)
+SELECT Sum(sales) AS "_col_0"
FROM (SELECT cs_quantity * cs_list_price sales
FROM catalog_sales,
date_dim
@@ -3372,49 +3380,49 @@ LIMIT 100;
-- TPC-DS 28
--------------------------------------
SELECT *
-FROM (SELECT Avg(ss_list_price) B1_LP,
- Count(ss_list_price) B1_CNT,
- Count(DISTINCT ss_list_price) B1_CNTD
+FROM (SELECT Avg(ss_list_price) b1_lp,
+ Count(ss_list_price) b1_cnt,
+ Count(DISTINCT ss_list_price) b1_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 0 AND 5
AND ( ss_list_price BETWEEN 18 AND 18 + 10
OR ss_coupon_amt BETWEEN 1939 AND 1939 + 1000
OR ss_wholesale_cost BETWEEN 34 AND 34 + 20 )) B1,
- (SELECT Avg(ss_list_price) B2_LP,
- Count(ss_list_price) B2_CNT,
- Count(DISTINCT ss_list_price) B2_CNTD
+ (SELECT Avg(ss_list_price) b2_lp,
+ Count(ss_list_price) b2_cnt,
+ Count(DISTINCT ss_list_price) b2_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 6 AND 10
AND ( ss_list_price BETWEEN 1 AND 1 + 10
OR ss_coupon_amt BETWEEN 35 AND 35 + 1000
OR ss_wholesale_cost BETWEEN 50 AND 50 + 20 )) B2,
- (SELECT Avg(ss_list_price) B3_LP,
- Count(ss_list_price) B3_CNT,
- Count(DISTINCT ss_list_price) B3_CNTD
+ (SELECT Avg(ss_list_price) b3_lp,
+ Count(ss_list_price) b3_cnt,
+ Count(DISTINCT ss_list_price) b3_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 11 AND 15
AND ( ss_list_price BETWEEN 91 AND 91 + 10
OR ss_coupon_amt BETWEEN 1412 AND 1412 + 1000
OR ss_wholesale_cost BETWEEN 17 AND 17 + 20 )) B3,
- (SELECT Avg(ss_list_price) B4_LP,
- Count(ss_list_price) B4_CNT,
- Count(DISTINCT ss_list_price) B4_CNTD
+ (SELECT Avg(ss_list_price) b4_lp,
+ Count(ss_list_price) b4_cnt,
+ Count(DISTINCT ss_list_price) b4_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 16 AND 20
AND ( ss_list_price BETWEEN 9 AND 9 + 10
OR ss_coupon_amt BETWEEN 5270 AND 5270 + 1000
OR ss_wholesale_cost BETWEEN 29 AND 29 + 20 )) B4,
- (SELECT Avg(ss_list_price) B5_LP,
- Count(ss_list_price) B5_CNT,
- Count(DISTINCT ss_list_price) B5_CNTD
+ (SELECT Avg(ss_list_price) b5_lp,
+ Count(ss_list_price) b5_cnt,
+ Count(DISTINCT ss_list_price) b5_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 25
AND ( ss_list_price BETWEEN 45 AND 45 + 10
OR ss_coupon_amt BETWEEN 826 AND 826 + 1000
OR ss_wholesale_cost BETWEEN 5 AND 5 + 20 )) B5,
- (SELECT Avg(ss_list_price) B6_LP,
- Count(ss_list_price) B6_CNT,
- Count(DISTINCT ss_list_price) B6_CNTD
+ (SELECT Avg(ss_list_price) b6_lp,
+ Count(ss_list_price) b6_cnt,
+ Count(DISTINCT ss_list_price) b6_cntd
FROM store_sales
WHERE ss_quantity BETWEEN 26 AND 30
AND ( ss_list_price BETWEEN 174 AND 174 + 10
@@ -3429,9 +3437,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 2939 AND "store_sales"."ss_coupon_amt" >= 1939
- OR "store_sales"."ss_list_price" <= 28 AND "store_sales"."ss_list_price" >= 18
- OR "store_sales"."ss_wholesale_cost" <= 54 AND "store_sales"."ss_wholesale_cost" >= 34
+ "store_sales"."ss_coupon_amt" <= 2939
+ AND "store_sales"."ss_coupon_amt" >= 1939
+ OR "store_sales"."ss_list_price" <= 28
+ AND "store_sales"."ss_list_price" >= 18
+ OR "store_sales"."ss_wholesale_cost" <= 54
+ AND "store_sales"."ss_wholesale_cost" >= 34
)
AND "store_sales"."ss_quantity" <= 5
AND "store_sales"."ss_quantity" >= 0
@@ -3443,9 +3454,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 1035 AND "store_sales"."ss_coupon_amt" >= 35
- OR "store_sales"."ss_list_price" <= 11 AND "store_sales"."ss_list_price" >= 1
- OR "store_sales"."ss_wholesale_cost" <= 70 AND "store_sales"."ss_wholesale_cost" >= 50
+ "store_sales"."ss_coupon_amt" <= 1035
+ AND "store_sales"."ss_coupon_amt" >= 35
+ OR "store_sales"."ss_list_price" <= 11
+ AND "store_sales"."ss_list_price" >= 1
+ OR "store_sales"."ss_wholesale_cost" <= 70
+ AND "store_sales"."ss_wholesale_cost" >= 50
)
AND "store_sales"."ss_quantity" <= 10
AND "store_sales"."ss_quantity" >= 6
@@ -3457,9 +3471,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 2412 AND "store_sales"."ss_coupon_amt" >= 1412
- OR "store_sales"."ss_list_price" <= 101 AND "store_sales"."ss_list_price" >= 91
- OR "store_sales"."ss_wholesale_cost" <= 37 AND "store_sales"."ss_wholesale_cost" >= 17
+ "store_sales"."ss_coupon_amt" <= 2412
+ AND "store_sales"."ss_coupon_amt" >= 1412
+ OR "store_sales"."ss_list_price" <= 101
+ AND "store_sales"."ss_list_price" >= 91
+ OR "store_sales"."ss_wholesale_cost" <= 37
+ AND "store_sales"."ss_wholesale_cost" >= 17
)
AND "store_sales"."ss_quantity" <= 15
AND "store_sales"."ss_quantity" >= 11
@@ -3471,9 +3488,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 6270 AND "store_sales"."ss_coupon_amt" >= 5270
- OR "store_sales"."ss_list_price" <= 19 AND "store_sales"."ss_list_price" >= 9
- OR "store_sales"."ss_wholesale_cost" <= 49 AND "store_sales"."ss_wholesale_cost" >= 29
+ "store_sales"."ss_coupon_amt" <= 6270
+ AND "store_sales"."ss_coupon_amt" >= 5270
+ OR "store_sales"."ss_list_price" <= 19
+ AND "store_sales"."ss_list_price" >= 9
+ OR "store_sales"."ss_wholesale_cost" <= 49
+ AND "store_sales"."ss_wholesale_cost" >= 29
)
AND "store_sales"."ss_quantity" <= 20
AND "store_sales"."ss_quantity" >= 16
@@ -3485,9 +3505,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 1826 AND "store_sales"."ss_coupon_amt" >= 826
- OR "store_sales"."ss_list_price" <= 55 AND "store_sales"."ss_list_price" >= 45
- OR "store_sales"."ss_wholesale_cost" <= 25 AND "store_sales"."ss_wholesale_cost" >= 5
+ "store_sales"."ss_coupon_amt" <= 1826
+ AND "store_sales"."ss_coupon_amt" >= 826
+ OR "store_sales"."ss_list_price" <= 55
+ AND "store_sales"."ss_list_price" >= 45
+ OR "store_sales"."ss_wholesale_cost" <= 25
+ AND "store_sales"."ss_wholesale_cost" >= 5
)
AND "store_sales"."ss_quantity" <= 25
AND "store_sales"."ss_quantity" >= 21
@@ -3499,9 +3522,12 @@ WITH "b1" AS (
FROM "store_sales" AS "store_sales"
WHERE
(
- "store_sales"."ss_coupon_amt" <= 6548 AND "store_sales"."ss_coupon_amt" >= 5548
- OR "store_sales"."ss_list_price" <= 184 AND "store_sales"."ss_list_price" >= 174
- OR "store_sales"."ss_wholesale_cost" <= 62 AND "store_sales"."ss_wholesale_cost" >= 42
+ "store_sales"."ss_coupon_amt" <= 6548
+ AND "store_sales"."ss_coupon_amt" >= 5548
+ OR "store_sales"."ss_list_price" <= 184
+ AND "store_sales"."ss_list_price" >= 174
+ OR "store_sales"."ss_wholesale_cost" <= 62
+ AND "store_sales"."ss_wholesale_cost" >= 42
)
AND "store_sales"."ss_quantity" <= 30
AND "store_sales"."ss_quantity" >= 26
@@ -3860,11 +3886,17 @@ SELECT
"ss3"."store_sales" / "ss2"."store_sales" AS "store_q2_q3_increase"
FROM "ss" AS "ss1"
JOIN "ss" AS "ss2"
- ON "ss1"."ca_county" = "ss2"."ca_county" AND "ss2"."d_qoy" = 2 AND "ss2"."d_year" = 2001
+ ON "ss1"."ca_county" = "ss2"."ca_county"
+ AND "ss2"."d_qoy" = 2
+ AND "ss2"."d_year" = 2001
JOIN "ws" AS "ws1"
- ON "ss1"."ca_county" = "ws1"."ca_county" AND "ws1"."d_qoy" = 1 AND "ws1"."d_year" = 2001
+ ON "ss1"."ca_county" = "ws1"."ca_county"
+ AND "ws1"."d_qoy" = 1
+ AND "ws1"."d_year" = 2001
JOIN "ss" AS "ss3"
- ON "ss2"."ca_county" = "ss3"."ca_county" AND "ss3"."d_qoy" = 3 AND "ss3"."d_year" = 2001
+ ON "ss2"."ca_county" = "ss3"."ca_county"
+ AND "ss3"."d_qoy" = 3
+ AND "ss3"."d_year" = 2001
JOIN "ws" AS "ws2"
ON "ws1"."ca_county" = "ws2"."ca_county"
AND "ws2"."d_qoy" = 2
@@ -3932,7 +3964,7 @@ WITH "catalog_sales_2" AS (
FROM "date_dim" AS "date_dim"
WHERE
"date_dim"."d_date" >= '2001-03-04'
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-06-02' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-06-02' AS DATE)
), "_u_0" AS (
SELECT
1.3 * AVG("catalog_sales"."cs_ext_discount_amt") AS "_col_0",
@@ -3949,7 +3981,8 @@ FROM "catalog_sales_2" AS "catalog_sales"
JOIN "date_dim_2" AS "date_dim"
ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk"
JOIN "item" AS "item"
- ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_manufact_id" = 610
+ ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk"
+ AND "item"."i_manufact_id" = 610
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "item"."i_item_sk"
WHERE
@@ -4132,6 +4165,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 34
--------------------------------------
+# execute: true
SELECT c_last_name,
c_first_name,
c_salutation,
@@ -4234,24 +4268,25 @@ ORDER BY
--------------------------------------
-- TPC-DS 35
--------------------------------------
+# execute: true
SELECT ca_state,
cd_gender,
cd_marital_status,
cd_dep_count,
Count(*) cnt1,
- Stddev_samp(cd_dep_count),
- Avg(cd_dep_count),
- Max(cd_dep_count),
+ Stddev_samp(cd_dep_count) AS "_col_5",
+ Avg(cd_dep_count) AS "_col_6",
+ Max(cd_dep_count) AS "_col_7",
cd_dep_employed_count,
Count(*) cnt2,
- Stddev_samp(cd_dep_employed_count),
- Avg(cd_dep_employed_count),
- Max(cd_dep_employed_count),
+ Stddev_samp(cd_dep_employed_count) AS "_col_10",
+ Avg(cd_dep_employed_count) AS "_col_11",
+ Max(cd_dep_employed_count) AS "_col_12",
cd_dep_college_count,
Count(*) cnt3,
- Stddev_samp(cd_dep_college_count),
- Avg(cd_dep_college_count),
- Max(cd_dep_college_count)
+ Stddev_samp(cd_dep_college_count) AS "_col_15",
+ Avg(cd_dep_college_count) AS "_col_16",
+ Max(cd_dep_college_count) AS "_col_17"
FROM customer c,
customer_address ca,
customer_demographics
@@ -4495,8 +4530,8 @@ JOIN "inventory" AS "inventory"
AND "inventory"."inv_quantity_on_hand" >= 100
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('1999-05-05' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('1999-03-06' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('1999-05-05' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('1999-03-06' AS DATE)
WHERE
"item"."i_current_price" <= 50
AND "item"."i_current_price" >= 20
@@ -4512,7 +4547,8 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 38
--------------------------------------
-SELECT Count(*)
+# execute: true
+SELECT Count(*) AS "_col_0"
FROM (SELECT DISTINCT c_last_name,
c_first_name,
d_date
@@ -4771,8 +4807,8 @@ LEFT JOIN "catalog_returns" AS "catalog_returns"
AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number"
JOIN "date_dim" AS "date_dim"
ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-07-01' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-05-02' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-07-01' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2002-05-02' AS DATE)
JOIN "item" AS "item"
ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk"
AND "item"."i_current_price" <= 1.49
@@ -4980,10 +5016,11 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 42
--------------------------------------
+# execute: true
SELECT dt.d_year,
item.i_category_id,
item.i_category,
- Sum(ss_ext_sales_price)
+ Sum(ss_ext_sales_price) AS "_col_3"
FROM date_dim dt,
store_sales,
item
@@ -5132,7 +5169,8 @@ FROM "date_dim" AS "date_dim"
JOIN "store_sales" AS "store_sales"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
JOIN "store" AS "store"
- ON "store"."s_gmt_offset" = -5 AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
+ ON "store"."s_gmt_offset" = -5
+ AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
WHERE
"date_dim"."d_year" = 2002
GROUP BY
@@ -5266,9 +5304,10 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 45
--------------------------------------
+# execute: true
SELECT ca_zip,
ca_state,
- Sum(ws_sales_price)
+ Sum(ws_sales_price) AS "_col_2"
FROM web_sales,
customer,
customer_address,
@@ -5333,6 +5372,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 46
--------------------------------------
+# execute: true
SELECT c_last_name,
c_first_name,
ca_city,
@@ -5524,10 +5564,14 @@ WITH "v1" AS (
"date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999
)
AND (
- "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000
+ "date_dim"."d_moy" = 12
+ OR "date_dim"."d_year" = 1999
+ OR "date_dim"."d_year" = 2000
)
AND (
- "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000
+ "date_dim"."d_year" = 1998
+ OR "date_dim"."d_year" = 1999
+ OR "date_dim"."d_year" = 2000
)
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
@@ -5576,7 +5620,8 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 48
--------------------------------------
-SELECT Sum (ss_quantity)
+# execute: true
+SELECT Sum (ss_quantity) AS "_col_0"
FROM store_sales,
store,
customer_demographics,
@@ -5919,6 +5964,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 50
--------------------------------------
+# execute: true
SELECT s_store_name,
s_company_id,
s_street_number,
@@ -6811,10 +6857,14 @@ WITH "v1" AS (
"date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000
)
AND (
- "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001
+ "date_dim"."d_moy" = 12
+ OR "date_dim"."d_year" = 2000
+ OR "date_dim"."d_year" = 2001
)
AND (
- "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001
+ "date_dim"."d_year" = 1999
+ OR "date_dim"."d_year" = 2000
+ OR "date_dim"."d_year" = 2001
)
GROUP BY
"item"."i_category",
@@ -7056,6 +7106,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 59
--------------------------------------
+# execute: true
WITH wss
AS (SELECT d_week_seq,
ss_store_sk,
@@ -7095,13 +7146,13 @@ WITH wss
SELECT s_store_name1,
s_store_id1,
d_week_seq1,
- sun_sales1 / sun_sales2,
- mon_sales1 / mon_sales2,
- tue_sales1 / tue_sales2,
- wed_sales1 / wed_sales2,
- thu_sales1 / thu_sales2,
- fri_sales1 / fri_sales2,
- sat_sales1 / sat_sales2
+ sun_sales1 / sun_sales2 AS "_col_3",
+ mon_sales1 / mon_sales2 AS "_col_4",
+ tue_sales1 / tue_sales2 AS "_col_5",
+ wed_sales1 / wed_sales2 AS "_col_6",
+ thu_sales1 / thu_sales2 AS "_col_7",
+ fri_sales1 / fri_sales2 AS "_col_8",
+ sat_sales1 / sat_sales2 AS "_col_9"
FROM (SELECT s_store_name s_store_name1,
wss.d_week_seq d_week_seq1,
s_store_id s_store_id1,
@@ -7553,7 +7604,8 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 62
--------------------------------------
-SELECT Substr(w_warehouse_name, 1, 20),
+# execute: true
+SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0",
sm_type,
web_name,
Sum(CASE
@@ -8132,6 +8184,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 66
--------------------------------------
+# execute: true
SELECT w_warehouse_name,
w_warehouse_sq_ft,
w_city,
@@ -9038,6 +9091,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 68
--------------------------------------
+# execute: true
SELECT c_last_name,
c_first_name,
ca_city,
@@ -9580,6 +9634,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 73
--------------------------------------
+# execute: true
SELECT c_last_name,
c_first_name,
c_salutation,
@@ -9667,6 +9722,7 @@ ORDER BY
--------------------------------------
-- TPC-DS 74
--------------------------------------
+# execute: true
WITH year_total
AS (SELECT c_customer_id customer_id,
c_first_name customer_first_name,
@@ -9826,6 +9882,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 75
--------------------------------------
+# execute: true
WITH all_sales
AS (SELECT d_year,
i_brand_id,
@@ -10030,6 +10087,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 76
--------------------------------------
+# execute: true
SELECT channel,
col_name,
d_year,
@@ -10280,8 +10338,8 @@ WITH "date_dim_2" AS (
"date_dim"."d_date" AS "d_date"
FROM "date_dim" AS "date_dim"
WHERE
- CAST("date_dim"."d_date" AS DATE) <= CAST('2001-09-15' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-08-16' AS DATE)
+ CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-09-15' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2001-08-16' AS DATE)
), "store_2" AS (
SELECT
"store"."s_store_sk" AS "s_store_sk"
@@ -10407,6 +10465,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 78
--------------------------------------
+# execute: true
WITH ws
AS (SELECT d_year AS ws_sold_year,
ws_item_sk,
@@ -10596,9 +10655,10 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 79
--------------------------------------
+# execute: true
SELECT c_last_name,
c_first_name,
- Substr(s_city, 1, 30),
+ Substr(s_city, 1, 30) AS "_col_2",
ss_ticket_number,
amt,
profit
@@ -10788,8 +10848,8 @@ WITH "date_dim_2" AS (
"date_dim"."d_date" AS "d_date"
FROM "date_dim" AS "date_dim"
WHERE
- CAST("date_dim"."d_date" AS DATE) <= CAST('2000-09-25' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-08-26' AS DATE)
+ CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-09-25' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-08-26' AS DATE)
), "item_2" AS (
SELECT
"item"."i_item_sk" AS "i_item_sk",
@@ -10909,6 +10969,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 81
--------------------------------------
+# execute: true
WITH customer_total_return
AS (SELECT cr_returning_customer_sk AS ctr_customer_sk,
ca_state AS ctr_state,
@@ -11068,8 +11129,8 @@ JOIN "store_sales" AS "store_sales"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk"
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('1998-06-26' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('1998-04-27' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('1998-06-26' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('1998-04-27' AS DATE)
WHERE
"item"."i_current_price" <= 93
AND "item"."i_current_price" >= 63
@@ -11329,10 +11390,11 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 85
--------------------------------------
-SELECT Substr(r_reason_desc, 1, 20),
- Avg(ws_quantity),
- Avg(wr_refunded_cash),
- Avg(wr_fee)
+# execute: true
+SELECT Substr(r_reason_desc, 1, 20) AS "_col_0",
+ Avg(ws_quantity) AS "_col_1",
+ Avg(wr_refunded_cash) AS "_col_2",
+ Avg(wr_fee) AS "_col_3"
FROM web_sales,
web_returns,
web_page,
@@ -11387,7 +11449,8 @@ SELECT
AVG("web_returns"."wr_fee") AS "_col_3"
FROM "web_sales" AS "web_sales"
JOIN "date_dim" AS "date_dim"
- ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "date_dim"."d_year" = 2001
+ ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk"
+ AND "date_dim"."d_year" = 2001
JOIN "web_page" AS "web_page"
ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk"
JOIN "web_returns" AS "web_returns"
@@ -11509,7 +11572,8 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 87
--------------------------------------
-select count(*)
+# execute: true
+select count(*) as "_col_0"
from ((select distinct c_last_name, c_first_name, d_date
from store_sales, date_dim, customer
where store_sales.ss_sold_date_sk = date_dim.d_date_sk
@@ -12020,10 +12084,11 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 91
--------------------------------------
-SELECT cc_call_center_id Call_Center,
- cc_name Call_Center_Name,
- cc_manager Manager,
- Sum(cr_net_loss) Returns_Loss
+# execute: true
+SELECT cc_call_center_id call_center,
+ cc_name call_center_name,
+ cc_manager manager,
+ Sum(cr_net_loss) returns_loss
FROM call_center,
catalog_returns,
date_dim,
@@ -12135,7 +12200,7 @@ WITH "web_sales_2" AS (
FROM "date_dim" AS "date_dim"
WHERE
"date_dim"."d_date" >= '2002-03-29'
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-06-27' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-06-27' AS DATE)
), "_u_0" AS (
SELECT
1.3 * AVG("web_sales"."ws_ext_discount_amt") AS "_col_0",
@@ -12276,14 +12341,14 @@ JOIN "date_dim" AS "date_dim"
AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk"
AND (
CAST('2000-3-01' AS DATE) + INTERVAL '60' DAY
- ) >= CAST("date_dim"."d_date" AS DATE)
+ ) >= CAST("date_dim"."d_date" AS DATETIME)
JOIN "web_site" AS "web_site"
ON "web_site"."web_company_name" = 'pri'
AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk"
WHERE
"_u_3"."_u_4" IS NULL
- AND NOT "_u_0"."_u_1" IS NULL
AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "ws1"."ws_warehouse_sk" <> "_x")
+ AND NOT "_u_0"."_u_1" IS NULL
ORDER BY
COUNT(DISTINCT "ws1"."ws_order_number")
LIMIT 100;
@@ -12366,7 +12431,7 @@ JOIN "date_dim" AS "date_dim"
AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk"
AND (
CAST('2000-4-01' AS DATE) + INTERVAL '60' DAY
- ) >= CAST("date_dim"."d_date" AS DATE)
+ ) >= CAST("date_dim"."d_date" AS DATETIME)
JOIN "web_site" AS "web_site"
ON "web_site"."web_company_name" = 'pri'
AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk"
@@ -12379,7 +12444,8 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 96
--------------------------------------
-SELECT Count(*)
+# execute: true
+SELECT Count(*) AS "_col_0"
FROM store_sales,
household_demographics,
time_dim,
@@ -12400,7 +12466,8 @@ JOIN "household_demographics" AS "household_demographics"
ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk"
AND "household_demographics"."hd_dep_count" = 7
JOIN "store" AS "store"
- ON "store"."s_store_name" = 'ese' AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
+ ON "store"."s_store_name" = 'ese'
+ AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
JOIN "time_dim" AS "time_dim"
ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk"
AND "time_dim"."t_hour" = 15
@@ -12412,6 +12479,7 @@ LIMIT 100;
--------------------------------------
-- TPC-DS 97
--------------------------------------
+# execute: true
WITH ssci
AS (SELECT ss_customer_sk customer_sk,
ss_item_sk item_sk
@@ -12502,7 +12570,8 @@ SELECT
) AS "store_and_catalog"
FROM "ssci" AS "ssci"
FULL JOIN "csci" AS "csci"
- ON "csci"."customer_sk" = "ssci"."customer_sk" AND "csci"."item_sk" = "ssci"."item_sk"
+ ON "csci"."customer_sk" = "ssci"."customer_sk"
+ AND "csci"."item_sk" = "ssci"."item_sk"
LIMIT 100;
--------------------------------------
@@ -12546,8 +12615,8 @@ SELECT
FROM "store_sales" AS "store_sales"
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
- AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-17' AS DATE)
- AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-18' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-17' AS DATE)
+ AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-05-18' AS DATE)
JOIN "item" AS "item"
ON "item"."i_category" IN ('Men', 'Home', 'Electronics')
AND "item"."i_item_sk" = "store_sales"."ss_item_sk"
@@ -12567,7 +12636,8 @@ ORDER BY
--------------------------------------
-- TPC-DS 99
--------------------------------------
-SELECT Substr(w_warehouse_name, 1, 20),
+# execute: true
+SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0",
sm_type,
cc_name,
Sum(CASE
diff --git a/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz b/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz
index cf64636..f2f07a3 100644
--- a/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz
index 894ce3b..62ddd8c 100644
--- a/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz
index 21f7040..af05d52 100644
--- a/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz
index b384c78..26b09b8 100644
--- a/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz
index b9b5f72..a8cabdb 100644
--- a/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz
+++ b/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz
Binary files differ
diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
index 39b5ffa..c131643 100644
--- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql
+++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
@@ -249,9 +249,9 @@ FROM "orders" AS "orders"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."l_orderkey" = "orders"."o_orderkey"
WHERE
- NOT "_u_0"."l_orderkey" IS NULL
- AND CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE)
+ CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE)
AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-07-01' AS DATE)
+ AND NOT "_u_0"."l_orderkey" IS NULL
GROUP BY
"orders"."o_orderpriority"
ORDER BY
@@ -609,7 +609,8 @@ JOIN "orders" AS "orders"
AND CAST("orders"."o_orderdate" AS DATE) < CAST('1994-01-01' AS DATE)
AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-10-01' AS DATE)
JOIN "lineitem" AS "lineitem"
- ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_returnflag" = 'R'
+ ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
+ AND "lineitem"."l_returnflag" = 'R'
GROUP BY
"customer"."c_custkey",
"customer"."c_name",
@@ -731,7 +732,8 @@ SELECT
) AS "high_line_count",
SUM(
CASE
- WHEN "orders"."o_orderpriority" <> '1-URGENT' AND "orders"."o_orderpriority" <> '2-HIGH'
+ WHEN "orders"."o_orderpriority" <> '1-URGENT'
+ AND "orders"."o_orderpriority" <> '2-HIGH'
THEN 1
ELSE 0
END
@@ -1257,7 +1259,8 @@ WITH "_u_0" AS (
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."p_partkey" = "partsupp"."ps_partkey"
LEFT JOIN "_u_1" AS "_u_1"
- ON "_u_1"."_u_2" = "partsupp"."ps_partkey" AND "_u_1"."_u_3" = "partsupp"."ps_suppkey"
+ ON "_u_1"."_u_2" = "partsupp"."ps_partkey"
+ AND "_u_1"."_u_3" = "partsupp"."ps_suppkey"
WHERE
"_u_1"."_col_0" < "partsupp"."ps_availqty" AND NOT "_u_0"."p_partkey" IS NULL
GROUP BY
@@ -1270,7 +1273,8 @@ FROM "supplier" AS "supplier"
LEFT JOIN "_u_4" AS "_u_4"
ON "_u_4"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
- ON "nation"."n_name" = 'CANADA' AND "nation"."n_nationkey" = "supplier"."s_nationkey"
+ ON "nation"."n_name" = 'CANADA'
+ AND "nation"."n_nationkey" = "supplier"."s_nationkey"
WHERE
NOT "_u_4"."ps_suppkey" IS NULL
ORDER BY
@@ -1358,8 +1362,8 @@ WHERE
"_u_2"."l_orderkey" IS NULL
OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "l1"."l_suppkey" <> "_x")
)
- AND NOT "_u_0"."l_orderkey" IS NULL
AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "l1"."l_suppkey" <> "_x")
+ AND NOT "_u_0"."l_orderkey" IS NULL
GROUP BY
"supplier"."s_name"
ORDER BY
diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql
index 23d9511..fac08be 100644
--- a/tests/fixtures/pretty.sql
+++ b/tests/fixtures/pretty.sql
@@ -384,3 +384,14 @@ JOIN b
CROSS JOIN d
JOIN e
ON d.id = e.id;
+
+SELECT * FROM a JOIN b JOIN c USING (e) JOIN d USING (f) USING (g);
+SELECT
+ *
+FROM a
+JOIN b
+ JOIN c
+ USING (e)
+ JOIN d
+ USING (f)
+ USING (g);
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 981c1d4..1eaca14 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -1,7 +1,7 @@
import os
import datetime
import unittest
-from datetime import date
+from datetime import date, time
from multiprocessing import Pool
import duckdb
@@ -640,6 +640,7 @@ class TestExecutor(unittest.TestCase):
("CAST(1 AS TEXT)", "1"),
("CAST('1' AS LONG)", 1),
("CAST('1.1' AS FLOAT)", 1.1),
+ ("CAST('12:05:01' AS TIME)", time(12, 5, 1)),
("COALESCE(NULL)", None),
("COALESCE(NULL, NULL)", None),
("COALESCE(NULL, 'b')", "b"),
@@ -702,6 +703,18 @@ class TestExecutor(unittest.TestCase):
("ARRAY_JOIN(['hello', null ,'world'], ' ', ',')", "hello , world"),
("ARRAY_JOIN(['', null ,'world'], ' ', ',')", " , world"),
("STRUCT('foo', 'bar', null, null)", {"foo": "bar"}),
+ ("ROUND(1.5)", 2),
+ ("ROUND(1.2)", 1),
+ ("ROUND(1.2345, 2)", 1.23),
+ ("ROUND(NULL)", None),
+ ("UNIXTOTIME(1659981729)", datetime.datetime(2022, 8, 8, 18, 2, 9)),
+ ("TIMESTRTOTIME('2013-04-05 01:02:03')", datetime.datetime(2013, 4, 5, 1, 2, 3)),
+ ("UNIXTOTIME(40 * 365 * 86400)", datetime.datetime(2009, 12, 22, 00, 00, 00)),
+ (
+ "STRTOTIME('08/03/2024 12:34:56', '%d/%m/%Y %H:%M:%S')",
+ datetime.datetime(2024, 3, 8, 12, 34, 56),
+ ),
+ ("STRTOTIME('27/01/2024', '%d/%m/%Y')", datetime.datetime(2024, 1, 27)),
]:
with self.subTest(sql):
result = execute(f"SELECT {sql}")
@@ -807,7 +820,7 @@ class TestExecutor(unittest.TestCase):
self.assertEqual(result.columns, columns)
self.assertEqual(result.rows, expected)
- def test_dict_values(self):
+ def test_nested_values(self):
tables = {"foo": [{"raw": {"name": "Hello, World", "a": [{"b": 1}]}}]}
result = execute("SELECT raw:name AS name FROM foo", read="snowflake", tables=tables)
@@ -837,3 +850,9 @@ class TestExecutor(unittest.TestCase):
self.assertEqual(result.columns, ("flavor",))
self.assertEqual(result.rows, [("cherry",), ("lime",), ("apple",)])
+
+ tables = {"t": [{"x": [1, 2, 3]}]}
+
+ result = execute("SELECT x FROM t", dialect="duckdb", tables=tables)
+ self.assertEqual(result.columns, ("x",))
+ self.assertEqual(result.rows, [([1, 2, 3],)])
diff --git a/tests/test_expressions.py b/tests/test_expressions.py
index 11f8fd3..ed19ac1 100644
--- a/tests/test_expressions.py
+++ b/tests/test_expressions.py
@@ -22,6 +22,9 @@ class TestExpressions(unittest.TestCase):
pass
def test_eq(self):
+ query = parse_one("SELECT x FROM t")
+ self.assertEqual(query, query.copy())
+
self.assertNotEqual(exp.to_identifier("a"), exp.to_identifier("A"))
self.assertEqual(
@@ -498,6 +501,18 @@ class TestExpressions(unittest.TestCase):
self.assertEqual(expression.transform(fun).sql(), "FUN(a)")
+ def test_transform_with_parent_mutation(self):
+ expression = parse_one("SELECT COUNT(1) FROM table")
+
+ def fun(node):
+ if str(node) == "COUNT(1)":
+ # node gets silently mutated here - its parent points to the filter node
+ return exp.Filter(this=node, expression=exp.Where(this=exp.true()))
+ return node
+
+ transformed = expression.transform(fun)
+ self.assertEqual(transformed.sql(), "SELECT COUNT(1) FILTER(WHERE TRUE) FROM table")
+
def test_transform_multiple_children(self):
expression = parse_one("SELECT * FROM x")
@@ -517,7 +532,6 @@ class TestExpressions(unittest.TestCase):
return node
self.assertEqual(expression.transform(remove_column_b).sql(), "SELECT a FROM x")
- self.assertEqual(expression.transform(lambda _: None), None)
expression = parse_one("CAST(x AS FLOAT)")
@@ -544,6 +558,11 @@ class TestExpressions(unittest.TestCase):
expression.find(exp.Table).replace(parse_one("y"))
self.assertEqual(expression.sql(), "SELECT c, b FROM y")
+ # we try to replace a with a list but a's parent is actually ordered, not the ORDER BY node
+ expression = parse_one("SELECT * FROM x ORDER BY a DESC, c")
+ expression.find(exp.Ordered).this.replace([exp.column("a").asc(), exp.column("b").desc()])
+ self.assertEqual(expression.sql(), "SELECT * FROM x ORDER BY a, b DESC, c")
+
def test_arg_deletion(self):
# Using the pop helper method
expression = parse_one("SELECT a, b FROM x")
@@ -573,10 +592,8 @@ class TestExpressions(unittest.TestCase):
expression = parse_one("SELECT * FROM (SELECT * FROM x)")
self.assertEqual(len(list(expression.walk())), 9)
self.assertEqual(len(list(expression.walk(bfs=False))), 9)
- self.assertTrue(all(isinstance(e, exp.Expression) for e, _, _ in expression.walk()))
- self.assertTrue(
- all(isinstance(e, exp.Expression) for e, _, _ in expression.walk(bfs=False))
- )
+ self.assertTrue(all(isinstance(e, exp.Expression) for e in expression.walk()))
+ self.assertTrue(all(isinstance(e, exp.Expression) for e in expression.walk(bfs=False)))
def test_functions(self):
self.assertIsInstance(parse_one("x LIKE ANY (y)"), exp.Like)
@@ -611,7 +628,9 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("LEAST(a, b)"), exp.Least)
self.assertIsInstance(parse_one("LIKE(x, y)"), exp.Like)
self.assertIsInstance(parse_one("LN(a)"), exp.Ln)
- self.assertIsInstance(parse_one("LOG10(a)"), exp.Log10)
+ self.assertIsInstance(parse_one("LOG(b, n)"), exp.Log)
+ self.assertIsInstance(parse_one("LOG2(a)"), exp.Log)
+ self.assertIsInstance(parse_one("LOG10(a)"), exp.Log)
self.assertIsInstance(parse_one("MAX(a)"), exp.Max)
self.assertIsInstance(parse_one("MIN(a)"), exp.Min)
self.assertIsInstance(parse_one("MONTH(a)"), exp.Month)
@@ -765,6 +784,15 @@ class TestExpressions(unittest.TestCase):
self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object})
def test_convert(self):
+ from collections import namedtuple
+
+ PointTuple = namedtuple("Point", ["x", "y"])
+
+ class PointClass:
+ def __init__(self, x=0, y=0):
+ self.x = x
+ self.y = y
+
for value, expected in [
(1, "1"),
("1", "'1'"),
@@ -775,14 +803,17 @@ class TestExpressions(unittest.TestCase):
({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, 1),
- "TIME_STR_TO_TIME('2022-10-01T01:01:01.000001+00:00')",
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001+00:00')",
),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc),
- "TIME_STR_TO_TIME('2022-10-01T01:01:01+00:00')",
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00')",
),
(datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"),
(math.nan, "NULL"),
+ (b"\x00\x00\x00\x00\x00\x00\x07\xd3", "2003"),
+ (PointTuple(1, 2), "STRUCT(1 AS x, 2 AS y)"),
+ (PointClass(1, 2), "STRUCT(1 AS x, 2 AS y)"),
]:
with self.subTest(value):
self.assertEqual(exp.convert(value).sql(), expected)
diff --git a/tests/test_lineage.py b/tests/test_lineage.py
index ed1a448..c782d9a 100644
--- a/tests/test_lineage.py
+++ b/tests/test_lineage.py
@@ -269,6 +269,41 @@ class TestLineage(unittest.TestCase):
node = node.downstream[0]
self.assertEqual(node.name, "z.a")
+ node = lineage(
+ "a",
+ """
+ WITH foo AS (
+ SELECT
+ 1 AS a
+ ), bar AS (
+ (
+ SELECT
+ a + 1 AS a
+ FROM foo
+ )
+ )
+ (
+ SELECT
+ a + b AS a
+ FROM bar
+ CROSS JOIN (
+ SELECT
+ 2 AS b
+ ) AS baz
+ )
+ """,
+ )
+ self.assertEqual(node.name, "a")
+ self.assertEqual(len(node.downstream), 2)
+ a, b = sorted(node.downstream, key=lambda n: n.name)
+ self.assertEqual(a.name, "bar.a")
+ self.assertEqual(len(a.downstream), 1)
+ self.assertEqual(b.name, "baz.b")
+ self.assertEqual(b.downstream, [])
+
+ node = a.downstream[0]
+ self.assertEqual(node.name, "foo.a")
+
def test_lineage_cte_union(self) -> None:
query = """
WITH dataset AS (
@@ -353,3 +388,46 @@ class TestLineage(unittest.TestCase):
with self.assertRaises(sqlglot.errors.SqlglotError):
lineage('"a"', "WITH x AS (SELECT 1 a) SELECT a FROM x", dialect="snowflake")
+
+ def test_ddl_lineage(self) -> None:
+ sql = """
+ INSERT /*+ HINT1 */
+ INTO target (x, y)
+ SELECT subq.x, subq.y
+ FROM (
+ SELECT /*+ HINT2 */
+ t.x AS x,
+ TO_DATE('2023-12-19', 'YYYY-MM-DD') AS y
+ FROM s.t t
+ WHERE 1 = 1 AND y = TO_DATE('2023-12-19', 'YYYY-MM-DD')
+ ) subq
+ """
+
+ node = lineage("y", sql, dialect="oracle")
+
+ self.assertEqual(node.name, "Y")
+ self.assertEqual(node.expression.sql(dialect="oracle"), "SUBQ.Y AS Y")
+
+ downstream = node.downstream[0]
+ self.assertEqual(downstream.name, "SUBQ.Y")
+ self.assertEqual(
+ downstream.expression.sql(dialect="oracle"), "TO_DATE('2023-12-19', 'YYYY-MM-DD') AS Y"
+ )
+
+ def test_trim(self) -> None:
+ sql = """
+ SELECT a, b, c
+ FROM (select a, b, c from y) z
+ """
+
+ node = lineage("a", sql, trim_selects=False)
+
+ self.assertEqual(node.name, "a")
+ self.assertEqual(
+ node.source.sql(),
+ "SELECT z.a AS a, z.b AS b, z.c AS c FROM (SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y) AS z",
+ )
+
+ downstream = node.downstream[0]
+ self.assertEqual(downstream.name, "z.a")
+ self.assertEqual(downstream.source.sql(), "SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y")
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 046e5a6..0e8ce15 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -298,7 +298,9 @@ class TestOptimizer(unittest.TestCase):
self.check_file(
"qualify_columns", qualify_columns, execute=True, schema=self.schema, set_dialect=True
)
- self.check_file("qualify_columns_ddl", qualify_columns, schema=self.schema)
+ self.check_file(
+ "qualify_columns_ddl", qualify_columns, schema=self.schema, set_dialect=True
+ )
def test_qualify_columns__with_invisible(self):
schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}})
@@ -340,6 +342,9 @@ class TestOptimizer(unittest.TestCase):
def test_simplify(self):
self.check_file("simplify", simplify, set_dialect=True)
+ expression = parse_one("SELECT a, c, b FROM table1 WHERE 1 = 1")
+ self.assertEqual(simplify(simplify(expression.find(exp.Where))).sql(), "WHERE TRUE")
+
expression = parse_one("TRUE AND TRUE AND TRUE")
self.assertEqual(exp.true(), optimizer.simplify.simplify(expression))
self.assertEqual(exp.true(), optimizer.simplify.simplify(expression.this))
@@ -359,15 +364,18 @@ class TestOptimizer(unittest.TestCase):
self.assertEqual("CONCAT('a', x, 'bc')", simplified_safe_concat.sql())
anon_unquoted_str = parse_one("anonymous(x, y)")
- self.assertEqual(optimizer.simplify.gen(anon_unquoted_str), "ANONYMOUS x,y")
+ self.assertEqual(optimizer.simplify.gen(anon_unquoted_str), "ANONYMOUS(x,y)")
+
+ query = parse_one("SELECT x FROM t")
+ self.assertEqual(optimizer.simplify.gen(query), optimizer.simplify.gen(query.copy()))
anon_unquoted_identifier = exp.Anonymous(
this=exp.to_identifier("anonymous"), expressions=[exp.column("x"), exp.column("y")]
)
- self.assertEqual(optimizer.simplify.gen(anon_unquoted_identifier), "ANONYMOUS x,y")
+ self.assertEqual(optimizer.simplify.gen(anon_unquoted_identifier), "ANONYMOUS(x,y)")
anon_quoted = parse_one('"anonymous"(x, y)')
- self.assertEqual(optimizer.simplify.gen(anon_quoted), '"anonymous" x,y')
+ self.assertEqual(optimizer.simplify.gen(anon_quoted), '"anonymous"(x,y)')
with self.assertRaises(ValueError) as e:
anon_invalid = exp.Anonymous(this=5)
@@ -375,6 +383,28 @@ class TestOptimizer(unittest.TestCase):
self.assertIn("Anonymous.this expects a str or an Identifier, got 'int'.", str(e.exception))
+ sql = parse_one(
+ """
+ WITH cte AS (select 1 union select 2), cte2 AS (
+ SELECT ROW() OVER (PARTITION BY y) FROM (
+ (select 1) limit 10
+ )
+ )
+ SELECT
+ *,
+ a + 1,
+ a div 1,
+ filter("B", (x, y) -> x + y)
+ FROM (z AS z CROSS JOIN z) AS f(a) LEFT JOIN a.b.c.d.e.f.g USING(n) ORDER BY 1
+ """
+ )
+ self.assertEqual(
+ optimizer.simplify.gen(sql),
+ """
+SELECT :with,WITH :expressions,CTE :this,UNION :this,SELECT :expressions,1,:expression,SELECT :expressions,2,:distinct,True,:alias, AS cte,CTE :this,SELECT :expressions,WINDOW :this,ROW(),:partition_by,y,:over,OVER,:from,FROM ((SELECT :expressions,1):limit,LIMIT :expression,10),:alias, AS cte2,:expressions,STAR,a + 1,a DIV 1,FILTER("B",LAMBDA :this,x + y,:expressions,x,y),:from,FROM (z AS z:joins,JOIN :this,z,:kind,CROSS) AS f(a),:joins,JOIN :this,a.b.c.d.e.f.g,:side,LEFT,:using,n,:order,ORDER :expressions,ORDERED :this,1,:nulls_first,True
+""".strip(),
+ )
+
def test_unnest_subqueries(self):
self.check_file(
"unnest_subqueries",
@@ -475,6 +505,18 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
)
def test_scope(self):
+ ast = parse_one("SELECT IF(a IN UNNEST(b), 1, 0) AS c FROM t", dialect="bigquery")
+ self.assertEqual(build_scope(ast).columns, [exp.column("a"), exp.column("b")])
+
+ many_unions = parse_one(" UNION ALL ".join(["SELECT x FROM t"] * 10000))
+ scopes_using_traverse = list(build_scope(many_unions).traverse())
+ scopes_using_traverse_scope = traverse_scope(many_unions)
+ self.assertEqual(len(scopes_using_traverse), len(scopes_using_traverse_scope))
+ assert all(
+ x.expression is y.expression
+ for x, y in zip(scopes_using_traverse, scopes_using_traverse_scope)
+ )
+
sql = """
WITH q AS (
SELECT x.b FROM x
@@ -522,7 +564,7 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
self.assertEqual(
{
node.sql()
- for node, *_ in walk_in_scope(expression.find(exp.Where))
+ for node in walk_in_scope(expression.find(exp.Where))
if isinstance(node, exp.Column)
},
{"s.b"},
@@ -667,6 +709,14 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
self.assertEqual(expressions[0].type.this, exp.DataType.Type.BIGINT)
self.assertEqual(expressions[1].type.this, exp.DataType.Type.DOUBLE)
+ expressions = annotate_types(
+ parse_one("SELECT SUM(2 / 3), CAST(2 AS DECIMAL) / 3", dialect="mysql")
+ ).expressions
+
+ self.assertEqual(expressions[0].type.this, exp.DataType.Type.DOUBLE)
+ self.assertEqual(expressions[0].this.type.this, exp.DataType.Type.DOUBLE)
+ self.assertEqual(expressions[1].type.this, exp.DataType.Type.DECIMAL)
+
def test_bracket_annotation(self):
expression = annotate_types(parse_one("SELECT A[:]")).expressions[0]
@@ -1056,6 +1106,34 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
self.assertEqual(expression.selects[1].type, exp.DataType.build("STRUCT<c int>"))
self.assertEqual(expression.selects[2].type, exp.DataType.build("int"))
+ self.assertEqual(
+ annotate_types(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT x FROM UNNEST(GENERATE_DATE_ARRAY('2021-01-01', current_date(), interval 1 day)) AS x"
+ )
+ )
+ )
+ .selects[0]
+ .type,
+ exp.DataType.build("date"),
+ )
+
+ def test_map_annotation(self):
+ # ToMap annotation
+ expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb"))
+ self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)"))
+
+ # Map annotation
+ expression = annotate_types(
+ parse_one("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])", read="duckdb")
+ )
+ self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)"))
+
+ # VarMap annotation
+ expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark"))
+ self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)"))
+
def test_recursive_cte(self):
query = parse_one(
"""
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 970c1ac..29ef5b6 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -85,6 +85,18 @@ x"""
],
)
+ for simple_query in ("SELECT 1\r\n", "\r\nSELECT 1"):
+ tokens = Tokenizer().tokenize(simple_query)
+ tokens = [(token.token_type, token.text) for token in tokens]
+
+ self.assertEqual(
+ tokens,
+ [
+ (TokenType.SELECT, "SELECT"),
+ (TokenType.NUMBER, "1"),
+ ],
+ )
+
def test_command(self):
tokens = Tokenizer().tokenize("SHOW;")
self.assertEqual(tokens[0].token_type, TokenType.SHOW)
diff --git a/tests/test_transpile.py b/tests/test_transpile.py
index 49deda9..0c65da4 100644
--- a/tests/test_transpile.py
+++ b/tests/test_transpile.py
@@ -62,11 +62,29 @@ class TestTranspile(unittest.TestCase):
def test_some(self):
self.validate(
"SELECT * FROM x WHERE a = SOME (SELECT 1)",
- "SELECT * FROM x WHERE a = ANY (SELECT 1)",
+ "SELECT * FROM x WHERE a = ANY(SELECT 1)",
)
def test_leading_comma(self):
self.validate(
+ "SELECT a, b, c FROM (SELECT a, b, c FROM t)",
+ "SELECT\n"
+ " a\n"
+ " , b\n"
+ " , c\n"
+ "FROM (\n"
+ " SELECT\n"
+ " a\n"
+ " , b\n"
+ " , c\n"
+ " FROM t\n"
+ ")",
+ leading_comma=True,
+ pretty=True,
+ pad=4,
+ indent=4,
+ )
+ self.validate(
"SELECT FOO, BAR, BAZ",
"SELECT\n FOO\n , BAR\n , BAZ",
leading_comma=True,
@@ -275,7 +293,7 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""",
FROM b
/* where */
WHERE
- foo /* comment 1 */ AND bar AND bla /* comment 2 */""",
+ foo AND /* comment 1 */ bar AND /* comment 2 */ bla""",
pretty=True,
)
self.validate(
@@ -428,7 +446,8 @@ FROM dw_1_dw_1_1.exactonline_2.transactionlines""",
"""SELECT
'hotel1' AS hotel,
*
-FROM dw_1_dw_1_1.exactonline_1.transactionlines /*
+FROM dw_1_dw_1_1.exactonline_1.transactionlines
+/*
UNION ALL
SELECT
'Thon Partner Hotel Jølster' AS hotel,
@@ -479,6 +498,32 @@ SELECT
FROM base""",
pretty=True,
)
+ self.validate(
+ """-- comment
+SOME_FUNC(arg IGNORE NULLS)
+ OVER (PARTITION BY foo ORDER BY bla) AS col""",
+ "SOME_FUNC(arg IGNORE NULLS) OVER (PARTITION BY foo ORDER BY bla) AS col /* comment */",
+ pretty=True,
+ )
+ self.validate(
+ """
+ SELECT *
+ FROM x
+ INNER JOIN y
+ -- inner join z
+ LEFT JOIN z using (id)
+ using (id)
+ """,
+ """SELECT
+ *
+FROM x
+INNER JOIN y
+ /* inner join z */
+ LEFT JOIN z
+ USING (id)
+ USING (id)""",
+ pretty=True,
+ )
def test_types(self):
self.validate("INT 1", "CAST(1 AS INT)")
@@ -676,7 +721,11 @@ FROM base""",
)
self.validate("STR_TO_TIME('x', 'y')", "DATE_PARSE('x', 'y')", write="presto")
- self.validate("STR_TO_UNIX('x', 'y')", "TO_UNIXTIME(DATE_PARSE('x', 'y'))", write="presto")
+ self.validate(
+ "STR_TO_UNIX('x', 'y')",
+ "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('x' AS VARCHAR), 'y')), PARSE_DATETIME(CAST('x' AS VARCHAR), 'y')))",
+ write="presto",
+ )
self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="presto")
self.validate("TIME_TO_UNIX(x)", "TO_UNIXTIME(x)", write="presto")
self.validate(
@@ -714,7 +763,10 @@ FROM base""",
self.validate("x[x - 1]", "x[x - 1]", write="presto", identity=False)
self.validate(
- "x[array_size(y) - 1]", "x[CARDINALITY(y) - 1 + 1]", write="presto", identity=False
+ "x[array_size(y) - 1]",
+ "x[(CARDINALITY(y) - 1) + 1]",
+ write="presto",
+ identity=False,
)
self.validate("x[3 - 1]", "x[3]", write="presto", identity=False)
self.validate("MAP(a, b)[0]", "MAP(a, b)[0]", write="presto", identity=False)
@@ -758,7 +810,6 @@ FROM base""",
"CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')",
"COMMENT ON ACCESS METHOD gin IS 'GIN index access method'",
"CREATE OR REPLACE STAGE",
- "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)",
"EXECUTE statement",
"EXPLAIN SELECT * FROM x",
"GRANT INSERT ON foo TO bla",
@@ -904,3 +955,8 @@ FROM base""",
with self.assertRaises(UnsupportedError) as ctx:
unsupported(ErrorLevel.IMMEDIATE)
self.assertEqual(str(ctx.exception).count(error), 1)
+
+ def test_recursion(self):
+ sql = "1 AND 2 OR 3 AND " * 1000
+ sql += "4"
+ self.assertEqual(len(parse_one(sql).sql()), 17001)