From 8d36f5966675e23bee7026ba37ae0647fbf47300 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 10:11:53 +0200 Subject: Merging upstream version 23.7.0. Signed-off-by: Daniel Baumann --- tests/dataframe/integration/test_session.py | 7 + tests/dataframe/unit/test_column.py | 4 +- tests/dataframe/unit/test_functions.py | 22 +- tests/dataframe/unit/test_session.py | 6 +- tests/dialects/test_athena.py | 20 ++ tests/dialects/test_bigquery.py | 83 ++++-- tests/dialects/test_clickhouse.py | 28 +- tests/dialects/test_dialect.py | 117 +++++++- tests/dialects/test_drill.py | 63 +---- tests/dialects/test_duckdb.py | 90 +++++- tests/dialects/test_hive.py | 27 +- tests/dialects/test_mysql.py | 67 ++++- tests/dialects/test_oracle.py | 68 +++++ tests/dialects/test_postgres.py | 147 ++++++---- tests/dialects/test_presto.py | 30 +- tests/dialects/test_prql.py | 17 ++ tests/dialects/test_redshift.py | 38 ++- tests/dialects/test_snowflake.py | 175 ++++++++++-- tests/dialects/test_spark.py | 37 ++- tests/dialects/test_sqlite.py | 132 +++++---- tests/dialects/test_teradata.py | 5 +- tests/dialects/test_tsql.py | 29 +- tests/fixtures/identity.sql | 10 +- tests/fixtures/optimizer/canonicalize.sql | 11 +- tests/fixtures/optimizer/merge_subqueries.sql | 18 +- tests/fixtures/optimizer/optimizer.sql | 19 +- tests/fixtures/optimizer/pushdown_projections.sql | 6 + tests/fixtures/optimizer/qualify_columns.sql | 14 +- tests/fixtures/optimizer/qualify_columns_ddl.sql | 15 +- tests/fixtures/optimizer/qualify_tables.sql | 1 + tests/fixtures/optimizer/simplify.sql | 99 ++++++- tests/fixtures/optimizer/tpc-ds/call_center.csv.gz | Bin 425 -> 427 bytes .../fixtures/optimizer/tpc-ds/catalog_page.csv.gz | Bin 460883 -> 393991 bytes .../optimizer/tpc-ds/catalog_returns.csv.gz | Bin 158215 -> 167258 bytes .../fixtures/optimizer/tpc-ds/catalog_sales.csv.gz | Bin 1814673 -> 639482 bytes tests/fixtures/optimizer/tpc-ds/customer.csv.gz | Bin 107573 -> 109529 bytes .../optimizer/tpc-ds/customer_address.csv.gz | Bin 28719 -> 28542 bytes .../optimizer/tpc-ds/customer_demographics.csv.gz | Bin 126715 -> 33320 bytes tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz | Bin 1575448 -> 208646 bytes .../optimizer/tpc-ds/household_demographics.csv.gz | Bin 23544 -> 23432 bytes tests/fixtures/optimizer/tpc-ds/income_band.csv.gz | Bin 191 -> 194 bytes tests/fixtures/optimizer/tpc-ds/inventory.csv.gz | Bin 202661 -> 43193 bytes tests/fixtures/optimizer/tpc-ds/item.csv.gz | Bin 31336 -> 31259 bytes tests/fixtures/optimizer/tpc-ds/promotion.csv.gz | Bin 501 -> 501 bytes tests/fixtures/optimizer/tpc-ds/reason.csv.gz | Bin 83 -> 87 bytes tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz | Bin 633 -> 623 bytes tests/fixtures/optimizer/tpc-ds/store.csv.gz | Bin 397 -> 398 bytes .../fixtures/optimizer/tpc-ds/store_returns.csv.gz | Bin 255650 -> 266354 bytes tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz | Bin 2436694 -> 421923 bytes tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz | Bin 680588 -> 77216 bytes tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 306 +++++++++++++-------- tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz | Bin 221 -> 224 bytes tests/fixtures/optimizer/tpc-ds/web_page.csv.gz | Bin 212 -> 215 bytes tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz | Bin 67833 -> 71921 bytes tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz | Bin 867887 -> 615903 bytes tests/fixtures/optimizer/tpc-ds/web_site.csv.gz | Bin 406 -> 409 bytes tests/fixtures/optimizer/tpc-h/tpc-h.sql | 18 +- tests/fixtures/pretty.sql | 11 + tests/test_executor.py | 23 +- tests/test_expressions.py | 47 +++- tests/test_lineage.py | 78 ++++++ tests/test_optimizer.py | 88 +++++- tests/test_tokens.py | 12 + tests/test_transpile.py | 68 ++++- 64 files changed, 1606 insertions(+), 450 deletions(-) create mode 100644 tests/dialects/test_athena.py create mode 100644 tests/dialects/test_prql.py (limited to 'tests') diff --git a/tests/dataframe/integration/test_session.py b/tests/dataframe/integration/test_session.py index ec50034..3bb3e20 100644 --- a/tests/dataframe/integration/test_session.py +++ b/tests/dataframe/integration/test_session.py @@ -34,3 +34,10 @@ class TestSessionFunc(DataFrameValidator): .agg(SF.countDistinct(SF.col("employee_id"))) ) self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True) + + def test_nameless_column(self): + query = "SELECT MAX(age) FROM employee" + df = self.spark.sql(query) + dfs = self.sqlglot.sql(query) + # Spark will alias the column to `max(age)` while sqlglot will alias to `_col_0` so their schemas will differ + self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True) diff --git a/tests/dataframe/unit/test_column.py b/tests/dataframe/unit/test_column.py index 7a12808..833005b 100644 --- a/tests/dataframe/unit/test_column.py +++ b/tests/dataframe/unit/test_column.py @@ -150,8 +150,8 @@ class TestDataframeColumn(unittest.TestCase): F.col("cola").between(datetime.date(2022, 1, 1), datetime.date(2022, 3, 1)).sql(), ) self.assertEqual( - "cola BETWEEN CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP) " - "AND CAST('2022-03-01T01:01:01+00:00' AS TIMESTAMP)", + "cola BETWEEN CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP) " + "AND CAST('2022-03-01 01:01:01+00:00' AS TIMESTAMP)", F.col("cola") .between(datetime.datetime(2022, 1, 1, 1, 1, 1), datetime.datetime(2022, 3, 1, 1, 1, 1)) .sql(), diff --git a/tests/dataframe/unit/test_functions.py b/tests/dataframe/unit/test_functions.py index e40d50d..884cded 100644 --- a/tests/dataframe/unit/test_functions.py +++ b/tests/dataframe/unit/test_functions.py @@ -29,7 +29,7 @@ class TestFunctions(unittest.TestCase): test_date = SF.lit(datetime.date(2022, 1, 1)) self.assertEqual("CAST('2022-01-01' AS DATE)", test_date.sql()) test_datetime = SF.lit(datetime.datetime(2022, 1, 1, 1, 1, 1)) - self.assertEqual("CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql()) + self.assertEqual("CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql()) test_dict = SF.lit({"cola": 1, "colb": "test"}) self.assertEqual("STRUCT(1 AS cola, 'test' AS colb)", test_dict.sql()) @@ -51,7 +51,7 @@ class TestFunctions(unittest.TestCase): test_date = SF.col(datetime.date(2022, 1, 1)) self.assertEqual("CAST('2022-01-01' AS DATE)", test_date.sql()) test_datetime = SF.col(datetime.datetime(2022, 1, 1, 1, 1, 1)) - self.assertEqual("CAST('2022-01-01T01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql()) + self.assertEqual("CAST('2022-01-01 01:01:01+00:00' AS TIMESTAMP)", test_datetime.sql()) test_dict = SF.col({"cola": 1, "colb": "test"}) self.assertEqual("STRUCT(1 AS cola, 'test' AS colb)", test_dict.sql()) @@ -250,9 +250,9 @@ class TestFunctions(unittest.TestCase): def test_log10(self): col_str = SF.log10("cola") - self.assertEqual("LOG10(cola)", col_str.sql()) + self.assertEqual("LOG(10, cola)", col_str.sql()) col = SF.log10(SF.col("cola")) - self.assertEqual("LOG10(cola)", col.sql()) + self.assertEqual("LOG(10, cola)", col.sql()) def test_log1p(self): col_str = SF.log1p("cola") @@ -262,9 +262,9 @@ class TestFunctions(unittest.TestCase): def test_log2(self): col_str = SF.log2("cola") - self.assertEqual("LOG2(cola)", col_str.sql()) + self.assertEqual("LOG(2, cola)", col_str.sql()) col = SF.log2(SF.col("cola")) - self.assertEqual("LOG2(cola)", col.sql()) + self.assertEqual("LOG(2, cola)", col.sql()) def test_rint(self): col_str = SF.rint("cola") @@ -1156,17 +1156,17 @@ class TestFunctions(unittest.TestCase): def test_regexp_extract(self): col_str = SF.regexp_extract("cola", r"(\d+)-(\d+)", 1) - self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)', 1)", col_str.sql()) + self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)', 1)", col_str.sql()) col = SF.regexp_extract(SF.col("cola"), r"(\d+)-(\d+)", 1) - self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)', 1)", col.sql()) + self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)', 1)", col.sql()) col_no_idx = SF.regexp_extract(SF.col("cola"), r"(\d+)-(\d+)") - self.assertEqual("REGEXP_EXTRACT(cola, '(\\d+)-(\\d+)')", col_no_idx.sql()) + self.assertEqual("REGEXP_EXTRACT(cola, '(\\\\d+)-(\\\\d+)')", col_no_idx.sql()) def test_regexp_replace(self): col_str = SF.regexp_replace("cola", r"(\d+)", "--") - self.assertEqual("REGEXP_REPLACE(cola, '(\\d+)', '--')", col_str.sql()) + self.assertEqual("REGEXP_REPLACE(cola, '(\\\\d+)', '--')", col_str.sql()) col = SF.regexp_replace(SF.col("cola"), r"(\d+)", "--") - self.assertEqual("REGEXP_REPLACE(cola, '(\\d+)', '--')", col.sql()) + self.assertEqual("REGEXP_REPLACE(cola, '(\\\\d+)', '--')", col.sql()) def test_initcap(self): col_str = SF.initcap("cola") diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py index e2ebae4..848c603 100644 --- a/tests/dataframe/unit/test_session.py +++ b/tests/dataframe/unit/test_session.py @@ -79,7 +79,7 @@ class TestDataframeSession(DataFrameSQLValidator): sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query).groupBy(F.col("cola")).agg(F.sum("colb")) self.assertEqual( - "WITH t38189 AS (SELECT cola, colb FROM table), t42330 AS (SELECT cola, colb FROM t38189) SELECT cola, SUM(colb) FROM t42330 GROUP BY cola", + "WITH t26614 AS (SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`), t23454 AS (SELECT cola, colb FROM t26614) SELECT cola, SUM(colb) FROM t23454 GROUP BY cola", df.sql(pretty=False, optimize=False)[0], ) @@ -87,14 +87,14 @@ class TestDataframeSession(DataFrameSQLValidator): query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query) - expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" + expected = "CREATE TABLE `new_table` AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" self.compare_sql(df, expected) def test_sql_insert(self): query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query) - expected = "INSERT INTO new_table SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" + expected = "INSERT INTO `new_table` SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" self.compare_sql(df, expected) def test_session_create_builder_patterns(self): diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py new file mode 100644 index 0000000..3288ada --- /dev/null +++ b/tests/dialects/test_athena.py @@ -0,0 +1,20 @@ +from tests.dialects.test_dialect import Validator + + +class TestAthena(Validator): + dialect = "athena" + maxDiff = None + + def test_athena(self): + self.validate_identity( + """USING EXTERNAL FUNCTION some_function(input VARBINARY) + RETURNS VARCHAR + LAMBDA 'some-name' + SELECT + some_function(1)""", + check_command_warning=True, + ) + + self.validate_identity( + "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" + ) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 0d94d19..300d492 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -51,6 +51,8 @@ class TestBigQuery(Validator): self.assertEqual(table.name, "_y") self.validate_identity("SELECT * FROM x-0.y") + self.assertEqual(exp.to_table("`a.b`.`c.d`", dialect="bigquery").sql(), '"a"."b"."c"."d"') + self.assertEqual(exp.to_table("`x`.`y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql("bigquery"), "`x.y.z`") self.assertEqual(exp.to_table("`x`.`y`", dialect="bigquery").sql("bigquery"), "`x`.`y`") @@ -58,6 +60,8 @@ class TestBigQuery(Validator): select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`") self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF") + self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text") + self.validate_identity("SELECT jsondoc['some_key']") self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`") self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`") self.validate_identity("CREATE OR REPLACE TABLE `a.b.c` CLONE `a.b.d`") @@ -177,6 +181,13 @@ class TestBigQuery(Validator): self.validate_identity( """SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')""" ) + self.validate_identity( + "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", + ) + self.validate_identity( + "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c", + "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c", + ) self.validate_identity( "SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)", "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)", @@ -185,10 +196,6 @@ class TestBigQuery(Validator): "select array_contains([1, 2, 3], 1)", "SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)", ) - self.validate_identity( - "create or replace view test (tenant_id OPTIONS(description='Test description on table creation')) select 1 as tenant_id, 1 as customer_id;", - "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", - ) self.validate_identity( "SELECT SPLIT(foo)", "SELECT SPLIT(foo, ',')", @@ -658,6 +665,13 @@ class TestBigQuery(Validator): "duckdb": "SELECT {'y': ARRAY(SELECT {'b': 1} FROM x)} FROM z", }, ) + self.validate_all( + "SELECT CAST(STRUCT(1) AS STRUCT)", + write={ + "bigquery": "SELECT CAST(STRUCT(1) AS STRUCT)", + "snowflake": "SELECT CAST(OBJECT_CONSTRUCT('_0', 1) AS OBJECT)", + }, + ) self.validate_all( "cast(x as date format 'MM/DD/YYYY')", write={ @@ -724,10 +738,10 @@ class TestBigQuery(Validator): self.validate_all( "'\\\\'", write={ - "bigquery": r"'\\'", - "duckdb": r"'\\'", - "presto": r"'\\'", - "hive": r"'\\'", + "bigquery": "'\\\\'", + "duckdb": "'\\'", + "presto": "'\\'", + "hive": "'\\\\'", }, ) self.validate_all( @@ -1004,20 +1018,28 @@ class TestBigQuery(Validator): }, ) self.validate_all( - "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) AS tab", read={ - "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) as tab", "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", "spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)", }, ) self.validate_all( - "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", + "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", read={ - "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", + "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", + "postgres": "SELECT * FROM (VALUES (1)) AS t1", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", + read={ + "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", "postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", }, ) + self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table", write={ @@ -1050,28 +1072,43 @@ class TestBigQuery(Validator): ) self.validate_all( """SELECT - `u`.`harness_user_email` AS `harness_user_email`, - `d`.`harness_user_id` AS `harness_user_id`, - `harness_account_id` AS `harness_account_id` -FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`harness_cluster_details`) AS `d`, UNNEST(`d`.`harness_account_ids`) AS `harness_account_id` + `u`.`user_email` AS `user_email`, + `d`.`user_id` AS `user_id`, + `account_id` AS `account_id` +FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`cluster_details`) AS `d`, UNNEST(`d`.`account_ids`) AS `account_id` WHERE - NOT `harness_account_id` IS NULL""", + NOT `account_id` IS NULL""", read={ "": """ SELECT - "u"."harness_user_email" AS "harness_user_email", - "_q_0"."d"."harness_user_id" AS "harness_user_id", - "_q_1"."harness_account_id" AS "harness_account_id" + "u"."user_email" AS "user_email", + "_q_0"."d"."user_id" AS "user_id", + "_q_1"."account_id" AS "account_id" FROM "analytics_staging"."stg_mongodb__users" AS "u", - UNNEST("u"."harness_cluster_details") AS "_q_0"("d"), - UNNEST("_q_0"."d"."harness_account_ids") AS "_q_1"("harness_account_id") + UNNEST("u"."cluster_details") AS "_q_0"("d"), + UNNEST("_q_0"."d"."account_ids") AS "_q_1"("account_id") WHERE - NOT "_q_1"."harness_account_id" IS NULL + NOT "_q_1"."account_id" IS NULL """ }, pretty=True, ) + self.validate_all( + "SELECT MOD(x, 10)", + read={"postgres": "SELECT x % 10"}, + write={ + "bigquery": "SELECT MOD(x, 10)", + "postgres": "SELECT x % 10", + }, + ) + self.validate_all( + "SELECT CAST(x AS DATETIME)", + write={ + "": "SELECT CAST(x AS TIMESTAMP)", + "bigquery": "SELECT CAST(x AS DATETIME)", + }, + ) def test_errors(self): with self.assertRaises(TokenError): diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index edf3da1..c5f9847 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,5 +1,6 @@ from sqlglot import exp, parse_one from tests.dialects.test_dialect import Validator +from sqlglot.errors import ErrorLevel class TestClickhouse(Validator): @@ -153,7 +154,9 @@ class TestClickhouse(Validator): self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster") self.validate_identity("TRUNCATE DATABASE db") self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster") - + self.validate_identity( + "CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)" + ) self.validate_all( "SELECT arrayJoin([1,2,3])", write={ @@ -390,6 +393,22 @@ class TestClickhouse(Validator): ) self.validate_identity("SYSTEM STOP MERGES foo.bar", check_command_warning=True) + self.validate_identity( + "INSERT INTO FUNCTION s3('url', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table" + ) + self.validate_identity( + "INSERT INTO FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')" + ) + self.validate_identity( + """INSERT INTO TABLE FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""", + """INSERT INTO FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""", + ) + + self.validate_identity("SELECT 1 FORMAT TabSeparated") + self.validate_identity("SELECT * FROM t FORMAT TabSeparated") + self.validate_identity("SELECT FORMAT") + self.validate_identity("1 AS FORMAT").assert_is(exp.Alias) + def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") self.validate_identity("WITH ['c'] AS field_names SELECT field_names") @@ -401,6 +420,13 @@ class TestClickhouse(Validator): self.assertIsInstance(query.args["with"].expressions[0].this, exp.Subquery) self.assertEqual(query.args["with"].expressions[0].alias, "y") + query = "WITH 1 AS var SELECT var" + for error_level in [ErrorLevel.IGNORE, ErrorLevel.RAISE, ErrorLevel.IMMEDIATE]: + self.assertEqual( + self.parse_one(query, error_level=error_level).sql(dialect=self.dialect), + query, + ) + def test_ternary(self): self.validate_all("x ? 1 : 2", write={"clickhouse": "CASE WHEN x THEN 1 ELSE 2 END"}) self.validate_all( diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 5faed51..76ab94b 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -17,8 +17,8 @@ from sqlglot.parser import logger as parser_logger class Validator(unittest.TestCase): dialect = None - def parse_one(self, sql): - return parse_one(sql, read=self.dialect) + def parse_one(self, sql, **kwargs): + return parse_one(sql, read=self.dialect, **kwargs) def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False): if check_command_warning: @@ -611,7 +611,7 @@ class TestDialect(Validator): write={ "duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))", "hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", - "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%m-%d'))", + "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(CAST('2020-01-01' AS VARCHAR), 'yyyy-MM-dd')))", "starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", "doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", }, @@ -700,7 +700,7 @@ class TestDialect(Validator): "hive": "TO_DATE(x)", "postgres": "CAST(x AS DATE)", "presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)", - "snowflake": "CAST(x AS DATE)", + "snowflake": "TO_DATE(x)", "doris": "TO_DATE(x)", "mysql": "DATE(x)", }, @@ -961,6 +961,7 @@ class TestDialect(Validator): "presto": "CAST(x AS DATE)", "spark": "CAST(x AS DATE)", "sqlite": "x", + "tsql": "CAST(x AS DATE)", }, ) self.validate_all( @@ -1509,7 +1510,7 @@ class TestDialect(Validator): "POSITION(needle, haystack, pos)", write={ "drill": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1", - "presto": "STRPOS(haystack, needle, pos)", + "presto": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1", "spark": "LOCATE(needle, haystack, pos)", "clickhouse": "position(haystack, needle, pos)", "snowflake": "POSITION(needle, haystack, pos)", @@ -1719,6 +1720,11 @@ class TestDialect(Validator): with self.subTest(f"{expression.__class__.__name__} {dialect} -> {expected}"): self.assertEqual(expected, expression.sql(dialect=dialect)) + self.assertEqual( + parse_one("CAST(x AS DECIMAL) / y", read="mysql").sql(dialect="postgres"), + "CAST(x AS DECIMAL) / NULLIF(y, 0)", + ) + def test_limit(self): self.validate_all( "SELECT * FROM data LIMIT 10, 20", @@ -2054,6 +2060,44 @@ SELECT ) def test_logarithm(self): + for base in (2, 10): + with self.subTest(f"Transpiling LOG base {base}"): + self.validate_all( + f"LOG({base}, a)", + read={ + "": f"LOG{base}(a)", + "bigquery": f"LOG{base}(a)", + "clickhouse": f"LOG{base}(a)", + "databricks": f"LOG{base}(a)", + "duckdb": f"LOG{base}(a)", + "mysql": f"LOG{base}(a)", + "postgres": f"LOG{base}(a)", + "presto": f"LOG{base}(a)", + "spark": f"LOG{base}(a)", + "sqlite": f"LOG{base}(a)", + "trino": f"LOG{base}(a)", + "tsql": f"LOG{base}(a)", + }, + write={ + "bigquery": f"LOG(a, {base})", + "clickhouse": f"LOG{base}(a)", + "duckdb": f"LOG({base}, a)", + "mysql": f"LOG({base}, a)", + "oracle": f"LOG({base}, a)", + "postgres": f"LOG({base}, a)", + "presto": f"LOG{base}(a)", + "redshift": f"LOG({base}, a)", + "snowflake": f"LOG({base}, a)", + "spark2": f"LOG({base}, a)", + "spark": f"LOG({base}, a)", + "sqlite": f"LOG({base}, a)", + "starrocks": f"LOG({base}, a)", + "tableau": f"LOG(a, {base})", + "trino": f"LOG({base}, a)", + "tsql": f"LOG(a, {base})", + }, + ) + self.validate_all( "LOG(x)", read={ @@ -2082,6 +2126,7 @@ SELECT "bigquery": "LOG(n, b)", "databricks": "LOG(b, n)", "drill": "LOG(b, n)", + "duckdb": "LOG(b, n)", "hive": "LOG(b, n)", "mysql": "LOG(b, n)", "oracle": "LOG(b, n)", @@ -2089,8 +2134,13 @@ SELECT "snowflake": "LOG(b, n)", "spark": "LOG(b, n)", "sqlite": "LOG(b, n)", + "trino": "LOG(b, n)", "tsql": "LOG(n, b)", }, + write={ + "clickhouse": UnsupportedError, + "presto": UnsupportedError, + }, ) def test_count_if(self): @@ -2190,7 +2240,28 @@ SELECT "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", write={ "duckdb": "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", - "tsql": "WITH t1(x) AS (SELECT 1), t2(y) AS (SELECT 2) SELECT * FROM (SELECT y AS y FROM t2) AS subq", + "tsql": "WITH t2(y) AS (SELECT 2), t1(x) AS (SELECT 1) SELECT * FROM (SELECT y AS y FROM t2) AS subq", + }, + ) + self.validate_all( + """ +WITH c AS ( + WITH b AS ( + WITH a1 AS ( + SELECT 1 + ), a2 AS ( + SELECT 2 + ) + SELECT * FROM a1, a2 + ) + SELECT * + FROM b +) +SELECT * +FROM c""", + write={ + "duckdb": "WITH c AS (WITH b AS (WITH a1 AS (SELECT 1), a2 AS (SELECT 2) SELECT * FROM a1, a2) SELECT * FROM b) SELECT * FROM c", + "hive": "WITH a1 AS (SELECT 1), a2 AS (SELECT 2), b AS (SELECT * FROM a1, a2), c AS (SELECT * FROM b) SELECT * FROM c", }, ) @@ -2312,3 +2383,37 @@ SELECT self.validate_identity("TRUNCATE TABLE db.schema.test") self.validate_identity("TRUNCATE TABLE IF EXISTS db.schema.test") self.validate_identity("TRUNCATE TABLE t1, t2, t3") + + def test_create_sequence(self): + self.validate_identity("CREATE SEQUENCE seq") + self.validate_identity( + "CREATE TEMPORARY SEQUENCE seq AS SMALLINT START WITH 3 INCREMENT BY 2 MINVALUE 1 MAXVALUE 10 CACHE 1 NO CYCLE OWNED BY table.col" + ) + self.validate_identity( + "CREATE SEQUENCE seq START WITH 1 NO MINVALUE NO MAXVALUE CYCLE NO CACHE" + ) + self.validate_identity("CREATE OR REPLACE TEMPORARY SEQUENCE seq INCREMENT BY 1 NO CYCLE") + self.validate_identity( + "CREATE OR REPLACE SEQUENCE IF NOT EXISTS seq COMMENT='test comment' ORDER" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=METADATA NOORDER NOKEEP SCALE EXTEND SHARD EXTEND SESSION" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=DATA ORDER KEEP NOSCALE NOSHARD GLOBAL" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=DATA NOCACHE NOCYCLE SCALE NOEXTEND" + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE OWNED BY NONE""", + """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE""", + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq START 1""", + """CREATE TEMPORARY SEQUENCE seq START WITH 1""", + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq START WITH = 1 INCREMENT BY = 2""", + """CREATE TEMPORARY SEQUENCE seq START WITH 1 INCREMENT BY 2""", + ) diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py index 41c02fb..634c247 100644 --- a/tests/dialects/test_drill.py +++ b/tests/dialects/test_drill.py @@ -5,68 +5,17 @@ class TestDrill(Validator): dialect = "drill" def test_drill(self): - self.validate_all( - "DATE_FORMAT(a, 'yyyy')", - write={"drill": "TO_CHAR(a, 'yyyy')"}, + self.validate_identity( + "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))" + ) + self.validate_identity( + "SELECT * FROM (SELECT * FROM t) PIVOT(avg(c1) AS ac1 FOR c2 IN ('V' AS v))", ) - def test_string_literals(self): self.validate_all( "SELECT '2021-01-01' + INTERVAL 1 MONTH", write={ + "drill": "SELECT '2021-01-01' + INTERVAL '1' MONTH", "mysql": "SELECT '2021-01-01' + INTERVAL '1' MONTH", }, ) - - def test_quotes(self): - self.validate_all( - "'\\''", - write={ - "duckdb": "''''", - "presto": "''''", - "hive": "'\\''", - "spark": "'\\''", - }, - ) - self.validate_all( - "'\"x\"'", - write={ - "duckdb": "'\"x\"'", - "presto": "'\"x\"'", - "hive": "'\"x\"'", - "spark": "'\"x\"'", - }, - ) - self.validate_all( - "'\\\\a'", - read={ - "presto": "'\\\\a'", - }, - write={ - "duckdb": "'\\\\a'", - "presto": "'\\\\a'", - "hive": "'\\\\a'", - "spark": "'\\\\a'", - }, - ) - - def test_table_function(self): - self.validate_all( - "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", - write={ - "drill": "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))", - }, - ) - - def test_validate_pivot(self): - self.validate_all( - "SELECT * FROM (SELECT education_level, salary, marital_status, " - "EXTRACT(year FROM age(birth_date)) age FROM cp.`employee.json`) PIVOT (avg(salary) AS " - "avg_salary, avg(age) AS avg_age FOR marital_status IN ('M' married, 'S' single))", - write={ - "drill": "SELECT * FROM (SELECT education_level, salary, marital_status, " - "EXTRACT(year FROM age(birth_date)) AS age FROM cp.`employee.json`) " - "PIVOT(avg(salary) AS avg_salary, avg(age) AS avg_age FOR marital_status " - "IN ('M' AS married, 'S' AS single))" - }, - ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 58d1f06..5a7e93e 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -1,5 +1,6 @@ from sqlglot import ErrorLevel, UnsupportedError, exp, parse_one, transpile from sqlglot.helper import logger as helper_logger +from sqlglot.optimizer.annotate_types import annotate_types from tests.dialects.test_dialect import Validator @@ -7,6 +8,31 @@ class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): + query = "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT t.col['b'] FROM _data, UNNEST(_data.col) AS t(col) WHERE t.col['a'] = 1" + expr = annotate_types(self.validate_identity(query)) + self.assertEqual( + expr.sql(dialect="bigquery"), + "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1", + ) + + self.validate_all( + "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + read={ + "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", + }, + ) + self.validate_all( + "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", + read={ + "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", + "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", + }, + write={ + "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", + "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", + }, + ) self.validate_all( 'STRUCT_PACK("a b" := 1)', write={ @@ -15,7 +41,25 @@ class TestDuckDB(Validator): "snowflake": "OBJECT_CONSTRUCT('a b', 1)", }, ) - + self.validate_all( + "ARRAY_TO_STRING(arr, delim)", + read={ + "bigquery": "ARRAY_TO_STRING(arr, delim)", + "postgres": "ARRAY_TO_STRING(arr, delim)", + "presto": "ARRAY_JOIN(arr, delim)", + "snowflake": "ARRAY_TO_STRING(arr, delim)", + "spark": "ARRAY_JOIN(arr, delim)", + }, + write={ + "bigquery": "ARRAY_TO_STRING(arr, delim)", + "duckdb": "ARRAY_TO_STRING(arr, delim)", + "postgres": "ARRAY_TO_STRING(arr, delim)", + "presto": "ARRAY_JOIN(arr, delim)", + "snowflake": "ARRAY_TO_STRING(arr, delim)", + "spark": "ARRAY_JOIN(arr, delim)", + "tsql": "STRING_AGG(arr, delim)", + }, + ) self.validate_all( "SELECT SUM(X) OVER (ORDER BY x)", write={ @@ -130,13 +174,6 @@ class TestDuckDB(Validator): "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"fruit": {"foo": "banana"}}'), 'fruit'), 'foo')""", }, ) - self.validate_all( - "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", - write={ - "bigquery": "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT (SELECT col.b FROM UNNEST(col) AS col WHERE col.a = 1) FROM _data", - "duckdb": "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", - }, - ) self.validate_all( "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", read={ @@ -201,6 +238,9 @@ class TestDuckDB(Validator): parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" ) + self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])") + self.validate_identity("SELECT MAP {'x': 1}") + self.validate_identity("SELECT df1.*, df2.* FROM df1 POSITIONAL JOIN df2") self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)") self.validate_identity("MAKE_TIMESTAMP(1667810584123456)") self.validate_identity("SELECT EPOCH_MS(10) AS t") @@ -234,6 +274,18 @@ class TestDuckDB(Validator): """SELECT '{"foo": [1, 2, 3]}' -> 'foo' -> 0""", """SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""", ) + self.validate_identity( + "SELECT ($$hello)'world$$)", + "SELECT ('hello)''world')", + ) + self.validate_identity( + "SELECT $$foo$$", + "SELECT 'foo'", + ) + self.validate_identity( + "SELECT $tag$foo$tag$", + "SELECT 'foo'", + ) self.validate_identity( "JSON_EXTRACT(x, '$.family')", "x -> '$.family'", @@ -679,7 +731,19 @@ class TestDuckDB(Validator): }, ) self.validate_identity( - "[x.STRING_SPLIT(' ')[1] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]" + "[x.STRING_SPLIT(' ')[i] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]" + ) + self.validate_identity( + """SELECT LIST_VALUE(1)[i]""", + """SELECT ([1])[i]""", + ) + self.validate_identity( + """{'x': LIST_VALUE(1)[i]}""", + """{'x': ([1])[i]}""", + ) + self.validate_identity( + """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': LIST_VALUE(1, 2, 3)[i], 'f2': LIST_VALUE(1, 2, 3)[i]})""", + """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': ([1, 2, 3])[i], 'f2': ([1, 2, 3])[i]})""", ) self.assertEqual( @@ -689,8 +753,6 @@ class TestDuckDB(Validator): "WARNING:sqlglot:Applying array index offset (1)", "WARNING:sqlglot:Applying array index offset (1)", "WARNING:sqlglot:Applying array index offset (1)", - "WARNING:sqlglot:Applying array index offset (-1)", - "WARNING:sqlglot:Applying array index offset (1)", ], ) @@ -702,7 +764,7 @@ class TestDuckDB(Validator): "SELECT MAKE_DATE(2016, 12, 25)", read={"bigquery": "SELECT DATE(2016, 12, 25)"} ) self.validate_all( - "SELECT CAST(CAST('2016-12-25 23:59:59' AS DATETIME) AS DATE)", + "SELECT CAST(CAST('2016-12-25 23:59:59' AS TIMESTAMP) AS DATE)", read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"}, ) self.validate_all( @@ -724,7 +786,7 @@ class TestDuckDB(Validator): write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"}, ) self.validate_all( - "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) DAY) + (7 * INTERVAL (-5) DAY))) AS t1", + "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - (DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7 % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1", read={ "presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1", }, @@ -952,7 +1014,7 @@ class TestDuckDB(Validator): "hive": "CAST(COL AS ARRAY)", "spark": "CAST(COL AS ARRAY)", "postgres": "CAST(COL AS BIGINT[])", - "snowflake": "CAST(COL AS ARRAY)", + "snowflake": "CAST(COL AS ARRAY(BIGINT))", }, ) self.validate_all( diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index b892dd6..33294ee 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -235,15 +235,18 @@ class TestHive(Validator): }, ) self.validate_all( - "'\\\\a'", + "'\\\\\\\\a'", read={ + "drill": "'\\\\\\\\a'", + "duckdb": "'\\\\a'", "presto": "'\\\\a'", }, write={ + "drill": "'\\\\\\\\a'", "duckdb": "'\\\\a'", + "hive": "'\\\\\\\\a'", "presto": "'\\\\a'", - "hive": "'\\\\a'", - "spark": "'\\\\a'", + "spark": "'\\\\\\\\a'", }, ) @@ -369,7 +372,7 @@ class TestHive(Validator): "UNIX_TIMESTAMP(x)", write={ "duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))", - "presto": "TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))", + "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(CAST(x AS VARCHAR), 'yyyy-MM-dd HH:mm:ss')))", "hive": "UNIX_TIMESTAMP(x)", "spark": "UNIX_TIMESTAMP(x)", "": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')", @@ -563,7 +566,7 @@ class TestHive(Validator): "LOCATE('a', x, 3)", write={ "duckdb": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", - "presto": "STRPOS(x, 'a', 3)", + "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", "hive": "LOCATE('a', x, 3)", "spark": "LOCATE('a', x, 3)", }, @@ -653,15 +656,6 @@ class TestHive(Validator): "spark": "LN(10)", }, ) - self.validate_all( - "LOG(10, 2)", - write={ - "duckdb": "LOG(10, 2)", - "presto": "LOG(10, 2)", - "hive": "LOG(10, 2)", - "spark": "LOG(10, 2)", - }, - ) self.validate_all( 'ds = "2020-01-01"', write={ @@ -745,13 +739,12 @@ class TestHive(Validator): ) def test_escapes(self) -> None: - self.validate_identity("'\n'") + self.validate_identity("'\n'", "'\\n'") self.validate_identity("'\\n'") - self.validate_identity("'\\\n'") + self.validate_identity("'\\\n'", "'\\\\\\n'") self.validate_identity("'\\\\n'") self.validate_identity("''") self.validate_identity("'\\\\'") - self.validate_identity("'\\z'") self.validate_identity("'\\\\z'") def test_data_type(self): diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 5f23c44..7a9d6bf 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -85,10 +85,17 @@ class TestMySQL(Validator): "ALTER TABLE test_table ALTER COLUMN test_column SET DATA TYPE LONGTEXT", "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT", ) + self.validate_identity( + "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT", + ) self.validate_identity( "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP) DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC", "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP() ON UPDATE CURRENT_TIMESTAMP()) DEFAULT CHARACTER SET=utf8 ROW_FORMAT=DYNAMIC", ) + self.validate_identity( + "CREATE TABLE `foo` (a VARCHAR(10), KEY idx_a (a DESC))", + "CREATE TABLE `foo` (a VARCHAR(10), INDEX idx_a (a DESC))", + ) self.validate_all( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", @@ -510,9 +517,8 @@ class TestMySQL(Validator): ) def test_mysql_time(self): - self.validate_identity("FROM_UNIXTIME(a, b)") - self.validate_identity("FROM_UNIXTIME(a, b, c)") self.validate_identity("TIME_STR_TO_UNIX(x)", "UNIX_TIMESTAMP(x)") + self.validate_identity("SELECT FROM_UNIXTIME(1711366265, '%Y %D %M')") self.validate_all( "SELECT TO_DAYS(x)", write={ @@ -578,6 +584,17 @@ class TestMySQL(Validator): self.validate_all( "STR_TO_DATE(x, '%Y-%m-%dT%T')", write={"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')"} ) + self.validate_all( + "SELECT FROM_UNIXTIME(col)", + read={ + "postgres": "SELECT TO_TIMESTAMP(col)", + }, + write={ + "mysql": "SELECT FROM_UNIXTIME(col)", + "postgres": "SELECT TO_TIMESTAMP(col)", + "redshift": "SELECT (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')", + }, + ) def test_mysql(self): self.validate_all( @@ -723,6 +740,52 @@ class TestMySQL(Validator): "postgres": "STRING_AGG(DISTINCT x, '' ORDER BY y DESC NULLS LAST)", }, ) + self.validate_all( + "GROUP_CONCAT(a, b, c SEPARATOR ',')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR ',')", + "sqlite": "GROUP_CONCAT(a || b || c, ',')", + "tsql": "STRING_AGG(CONCAT(a, b, c), ',')", + "postgres": "STRING_AGG(CONCAT(a, b, c), ',')", + "presto": "ARRAY_JOIN(ARRAY_AGG(CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR), CAST(c AS VARCHAR))), ',')", + }, + ) + self.validate_all( + "GROUP_CONCAT(a, b, c SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR '')", + "sqlite": "GROUP_CONCAT(a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '')", + "postgres": "STRING_AGG(CONCAT(a, b, c), '')", + }, + ) + self.validate_all( + "GROUP_CONCAT(DISTINCT a, b, c SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) SEPARATOR '')", + "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '')", + "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '')", + }, + ) + self.validate_all( + "GROUP_CONCAT(a, b, c ORDER BY d SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) ORDER BY d SEPARATOR '')", + "sqlite": "GROUP_CONCAT(a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", + "postgres": "STRING_AGG(CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)", + }, + ) + self.validate_all( + "GROUP_CONCAT(DISTINCT a, b, c ORDER BY d SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) ORDER BY d SEPARATOR '')", + "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", + "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)", + }, + ) self.validate_identity( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 9438507..526b0b5 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -94,7 +94,20 @@ class TestOracle(Validator): "SELECT * FROM t SAMPLE (0.25)", ) self.validate_identity("SELECT TO_CHAR(-100, 'L99', 'NL_CURRENCY = '' AusDollars '' ')") + self.validate_identity( + "SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2" + ) + self.validate_all( + "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + read={ + "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + }, + write={ + "oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + }, + ) self.validate_all( "TO_CHAR(x)", write={ @@ -102,6 +115,59 @@ class TestOracle(Validator): "oracle": "TO_CHAR(x)", }, ) + self.validate_all( + "TO_NUMBER(expr, fmt, nlsparam)", + read={ + "teradata": "TO_NUMBER(expr, fmt, nlsparam)", + }, + write={ + "oracle": "TO_NUMBER(expr, fmt, nlsparam)", + "teradata": "TO_NUMBER(expr, fmt, nlsparam)", + }, + ) + self.validate_all( + "TO_NUMBER(x)", + write={ + "bigquery": "CAST(x AS FLOAT64)", + "doris": "CAST(x AS DOUBLE)", + "drill": "CAST(x AS DOUBLE)", + "duckdb": "CAST(x AS DOUBLE)", + "hive": "CAST(x AS DOUBLE)", + "mysql": "CAST(x AS DOUBLE)", + "oracle": "TO_NUMBER(x)", + "postgres": "CAST(x AS DOUBLE PRECISION)", + "presto": "CAST(x AS DOUBLE)", + "redshift": "CAST(x AS DOUBLE PRECISION)", + "snowflake": "TO_NUMBER(x)", + "spark": "CAST(x AS DOUBLE)", + "spark2": "CAST(x AS DOUBLE)", + "starrocks": "CAST(x AS DOUBLE)", + "tableau": "CAST(x AS DOUBLE)", + "teradata": "TO_NUMBER(x)", + }, + ) + self.validate_all( + "TO_NUMBER(x, fmt)", + read={ + "databricks": "TO_NUMBER(x, fmt)", + "drill": "TO_NUMBER(x, fmt)", + "postgres": "TO_NUMBER(x, fmt)", + "snowflake": "TO_NUMBER(x, fmt)", + "spark": "TO_NUMBER(x, fmt)", + "redshift": "TO_NUMBER(x, fmt)", + "teradata": "TO_NUMBER(x, fmt)", + }, + write={ + "databricks": "TO_NUMBER(x, fmt)", + "drill": "TO_NUMBER(x, fmt)", + "oracle": "TO_NUMBER(x, fmt)", + "postgres": "TO_NUMBER(x, fmt)", + "snowflake": "TO_NUMBER(x, fmt)", + "spark": "TO_NUMBER(x, fmt)", + "redshift": "TO_NUMBER(x, fmt)", + "teradata": "TO_NUMBER(x, fmt)", + }, + ) self.validate_all( "SELECT TO_CHAR(TIMESTAMP '1999-12-01 10:00:00')", write={ @@ -210,6 +276,8 @@ class TestOracle(Validator): self.validate_identity( "SELECT /*+ LEADING(e j) */ * FROM employees e, departments d, job_history j WHERE e.department_id = d.department_id AND e.hire_date = j.start_date" ) + self.validate_identity("INSERT /*+ APPEND */ INTO IAP_TBL (id, col1) VALUES (2, 'test2')") + self.validate_identity("INSERT /*+ APPEND_VALUES */ INTO dest_table VALUES (i, 'Value')") def test_xml_table(self): self.validate_identity("XMLTABLE('x')") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 1d0ea8b..7a41cef 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -1,4 +1,4 @@ -from sqlglot import ParseError, UnsupportedError, exp, parse_one, transpile +from sqlglot import ParseError, UnsupportedError, exp, transpile from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator @@ -12,27 +12,12 @@ class TestPostgres(Validator): self.validate_identity("|/ x", "SQRT(x)") self.validate_identity("||/ x", "CBRT(x)") - expr = parse_one( - "SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)", read="postgres" - ) + expr = self.parse_one("SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)") unnest = expr.args["joins"][0].this.this unnest.assert_is(exp.Unnest) alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION""" - expr = parse_one(alter_table_only, read="postgres") - - # Checks that user-defined types are parsed into DataType instead of Identifier - parse_one("CREATE TABLE t (a udt)", read="postgres").this.expressions[0].args[ - "kind" - ].assert_is(exp.DataType) - - # Checks that OID is parsed into a DataType (ObjectIdentifier) - self.assertIsInstance( - parse_one("CREATE TABLE public.propertydata (propertyvalue oid)", read="postgres").find( - exp.DataType - ), - exp.ObjectIdentifier, - ) + expr = self.parse_one(alter_table_only) self.assertIsInstance(expr, exp.AlterTable) self.assertEqual(expr.sql(dialect="postgres"), alter_table_only) @@ -55,13 +40,6 @@ class TestPostgres(Validator): self.validate_identity("CAST(x AS DATEMULTIRANGE)") self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]") self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]") - self.validate_all( - "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", - write={ - "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))", - "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", - }, - ) self.validate_identity("x$") self.validate_identity("SELECT ARRAY[1, 2, 3]") self.validate_identity("SELECT ARRAY(SELECT 1)") @@ -85,6 +63,12 @@ class TestPostgres(Validator): self.validate_identity("EXEC AS myfunc @id = 123", check_command_warning=True) self.validate_identity("SELECT CURRENT_USER") self.validate_identity("SELECT * FROM ONLY t1") + self.validate_identity( + """UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)""" + ) + self.validate_identity( + "WITH t1 AS MATERIALIZED (SELECT 1), t2 AS NOT MATERIALIZED (SELECT 2) SELECT * FROM t1, t2" + ) self.validate_identity( """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)""" ) @@ -106,9 +90,6 @@ class TestPostgres(Validator): self.validate_identity( "SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)" ) - self.validate_identity( - "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)" - ) self.validate_identity( "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1) IN ('ab') THEN 1 ELSE 0 END" ) @@ -166,6 +147,10 @@ class TestPostgres(Validator): "SELECT $$Dianne's horse$$", "SELECT 'Dianne''s horse'", ) + self.validate_identity( + "SELECT $$The price is $9.95$$ AS msg", + "SELECT 'The price is $9.95' AS msg", + ) self.validate_identity( "COMMENT ON TABLE mytable IS $$doc this$$", "COMMENT ON TABLE mytable IS 'doc this'" ) @@ -328,6 +313,36 @@ class TestPostgres(Validator): ) self.validate_identity("SELECT * FROM t1*", "SELECT * FROM t1") + self.validate_all( + 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + write={ + "bigquery": "SELECT * FROM `test_table` ORDER BY RAND() NULLS LAST LIMIT 5", + "duckdb": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + "postgres": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + "tsql": "SELECT TOP 5 * FROM [test_table] ORDER BY RAND()", + }, + ) + self.validate_all( + "SELECT (data -> 'en-US') AS acat FROM my_table", + write={ + "duckdb": """SELECT (data -> '$."en-US"') AS acat FROM my_table""", + "postgres": "SELECT (data -> 'en-US') AS acat FROM my_table", + }, + ) + self.validate_all( + "SELECT (data ->> 'en-US') AS acat FROM my_table", + write={ + "duckdb": """SELECT (data ->> '$."en-US"') AS acat FROM my_table""", + "postgres": "SELECT (data ->> 'en-US') AS acat FROM my_table", + }, + ) + self.validate_all( + "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", + write={ + "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))", + "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", + }, + ) self.validate_all( "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", read={ @@ -517,15 +532,6 @@ class TestPostgres(Validator): "tsql": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", }, ) - self.validate_all( - "CREATE TABLE x (a UUID, b BYTEA)", - write={ - "duckdb": "CREATE TABLE x (a UUID, b BLOB)", - "presto": "CREATE TABLE x (a UUID, b VARBINARY)", - "hive": "CREATE TABLE x (a UUID, b BINARY)", - "spark": "CREATE TABLE x (a UUID, b BINARY)", - }, - ) self.validate_all( "SELECT * FROM x FETCH 1 ROW", write={ @@ -635,11 +641,30 @@ class TestPostgres(Validator): "postgres": "x / y ^ z", }, ) - - self.assertIsInstance(parse_one("id::UUID", read="postgres"), exp.Cast) + self.validate_all( + "CAST(x AS NAME)", + read={ + "redshift": "CAST(x AS NAME)", + }, + write={ + "postgres": "CAST(x AS NAME)", + "redshift": "CAST(x AS NAME)", + }, + ) + self.assertIsInstance(self.parse_one("id::UUID"), exp.Cast) def test_ddl(self): - expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres") + # Checks that user-defined types are parsed into DataType instead of Identifier + self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is( + exp.DataType + ) + + # Checks that OID is parsed into a DataType (ObjectIdentifier) + self.assertIsInstance( + self.parse_one("CREATE TABLE p.t (c oid)").find(exp.DataType), exp.ObjectIdentifier + ) + + expr = self.parse_one("CREATE TABLE t (x INTERVAL day)") cdef = expr.find(exp.ColumnDef) cdef.args["kind"].assert_is(exp.DataType) self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)") @@ -666,6 +691,21 @@ class TestPostgres(Validator): self.validate_identity("TRUNCATE TABLE t1 RESTRICT") self.validate_identity("TRUNCATE TABLE t1 CONTINUE IDENTITY CASCADE") self.validate_identity("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT") + self.validate_identity( + "CREATE TABLE t (vid INT NOT NULL, CONSTRAINT ht_vid_nid_fid_idx EXCLUDE (INT4RANGE(vid, nid) WITH &&, INT4RANGE(fid, fid, '[]') WITH &&))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, PRIMARY KEY (i), EXCLUDE USING gist(col varchar_pattern_ops DESC NULLS LAST WITH &&) WITH (sp1=1, sp2=2))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, EXCLUDE USING btree(INT4RANGE(vid, nid, '[]') ASC NULLS FIRST WITH &&) INCLUDE (col1, col2))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, EXCLUDE USING gin(col1 WITH &&, col2 WITH ||) USING INDEX TABLESPACE tablespace WHERE (id > 5))" + ) + self.validate_identity( + "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)" + ) self.validate_identity( "CREATE TABLE cust_part3 PARTITION OF customers FOR VALUES WITH (MODULUS 3, REMAINDER 2)" ) @@ -691,13 +731,13 @@ class TestPostgres(Validator): "CREATE INDEX index_issues_on_title_trigram ON public.issues USING gin(title public.gin_trgm_ops)" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO NOTHING RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO NOTHING RETURNING *" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = 1 RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = 1 RETURNING *" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = excluded.id RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = excluded.id RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT ON CONSTRAINT pkey DO NOTHING RETURNING *" @@ -740,8 +780,7 @@ class TestPostgres(Validator): check_command_warning=True, ) self.validate_identity( - "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", - check_command_warning=True, + "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp" ) self.validate_identity( "CREATE FUNCTION x(INT) RETURNS INT SET search_path TO 'public'", @@ -793,7 +832,7 @@ class TestPostgres(Validator): ])) ) """, - "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY (ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))", + "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY(ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))", ) self.validate_identity( "CREATE INDEX index_ci_pipelines_on_project_idandrefandiddesc ON public.ci_pipelines USING btree(project_id, ref, id DESC)" @@ -803,6 +842,16 @@ class TestPostgres(Validator): "TRUNCATE TABLE ONLY t1, t2, ONLY t3, t4, t5 RESTART IDENTITY CASCADE", ) + self.validate_all( + "CREATE TABLE x (a UUID, b BYTEA)", + write={ + "duckdb": "CREATE TABLE x (a UUID, b BLOB)", + "presto": "CREATE TABLE x (a UUID, b VARBINARY)", + "hive": "CREATE TABLE x (a UUID, b BINARY)", + "spark": "CREATE TABLE x (a UUID, b BINARY)", + }, + ) + with self.assertRaises(ParseError): transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres") with self.assertRaises(ParseError): @@ -857,7 +906,7 @@ class TestPostgres(Validator): ) def test_operator(self): - expr = parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3", read="postgres") + expr = self.parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3") expr.left.assert_is(exp.Operator) expr.left.left.assert_is(exp.Literal) @@ -926,8 +975,8 @@ class TestPostgres(Validator): def test_regexp_binary(self): """See https://github.com/tobymao/sqlglot/pull/2404 for details.""" - self.assertIsInstance(parse_one("'thomas' ~ '.*thomas.*'", read="postgres"), exp.Binary) - self.assertIsInstance(parse_one("'thomas' ~* '.*thomas.*'", read="postgres"), exp.Binary) + self.assertIsInstance(self.parse_one("'thomas' ~ '.*thomas.*'"), exp.Binary) + self.assertIsInstance(self.parse_one("'thomas' ~* '.*thomas.*'"), exp.Binary) def test_unnest_json_array(self): trino_input = """ diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 2ea595e..2162499 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -63,7 +63,7 @@ class TestPresto(Validator): "duckdb": "CAST(a AS INT[])", "presto": "CAST(a AS ARRAY(INTEGER))", "spark": "CAST(a AS ARRAY)", - "snowflake": "CAST(a AS ARRAY)", + "snowflake": "CAST(a AS ARRAY(INT))", }, ) self.validate_all( @@ -82,18 +82,17 @@ class TestPresto(Validator): "duckdb": "CAST([1, 2] AS BIGINT[])", "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", "spark": "CAST(ARRAY(1, 2) AS ARRAY)", - "snowflake": "CAST([1, 2] AS ARRAY)", + "snowflake": "CAST([1, 2] AS ARRAY(BIGINT))", }, ) self.validate_all( - "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))", + "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INT))", write={ - "bigquery": "CAST(MAP([1], [1]) AS MAP)", - "duckdb": "CAST(MAP([1], [1]) AS MAP(INT, INT))", - "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))", - "hive": "CAST(MAP(1, 1) AS MAP)", - "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP)", - "snowflake": "CAST(OBJECT_CONSTRUCT(1, 1) AS OBJECT)", + "duckdb": "CAST(MAP(['key'], [1]) AS MAP(TEXT, INT))", + "presto": "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INTEGER))", + "hive": "CAST(MAP('key', 1) AS MAP)", + "snowflake": "CAST(OBJECT_CONSTRUCT('key', 1) AS MAP(VARCHAR, INT))", + "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('key'), ARRAY(1)) AS MAP)", }, ) self.validate_all( @@ -104,7 +103,7 @@ class TestPresto(Validator): "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))", "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP>)", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP>)", - "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS OBJECT)", + "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS MAP(VARCHAR, ARRAY(INT)))", }, ) self.validate_all( @@ -178,6 +177,17 @@ class TestPresto(Validator): "spark": "ARRAY_JOIN(x, '-', 'a')", }, ) + self.validate_all( + "STRPOS('ABC', 'A', 3)", + read={ + "trino": "STRPOS('ABC', 'A', 3)", + }, + write={ + "presto": "STRPOS('ABC', 'A', 3)", + "trino": "STRPOS('ABC', 'A', 3)", + "snowflake": "POSITION('A', 'ABC')", + }, + ) def test_interval_plural_to_singular(self): # Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals diff --git a/tests/dialects/test_prql.py b/tests/dialects/test_prql.py new file mode 100644 index 0000000..9a42d0c --- /dev/null +++ b/tests/dialects/test_prql.py @@ -0,0 +1,17 @@ +from tests.dialects.test_dialect import Validator + + +class TestPRQL(Validator): + dialect = "prql" + + def test_prql(self): + self.validate_identity("FROM x", "SELECT * FROM x") + self.validate_identity("FROM x DERIVE a + 1", "SELECT *, a + 1 FROM x") + self.validate_identity("FROM x DERIVE x = a + 1", "SELECT *, a + 1 AS x FROM x") + self.validate_identity("FROM x DERIVE {a + 1}", "SELECT *, a + 1 FROM x") + self.validate_identity("FROM x DERIVE {x = a + 1, b}", "SELECT *, a + 1 AS x, b FROM x") + self.validate_identity("FROM x TAKE 10", "SELECT * FROM x LIMIT 10") + self.validate_identity("FROM x TAKE 10 TAKE 5", "SELECT * FROM x LIMIT 5") + self.validate_identity( + "FROM x DERIVE {x = a + 1, b} SELECT {y = x, 2}", "SELECT a + 1 AS y, 2 FROM x" + ) diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index 506f429..a91f4f9 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -139,6 +139,15 @@ class TestRedshift(Validator): "presto": "LENGTH(x)", }, ) + self.validate_all( + "x LIKE 'abc' || '%'", + read={ + "duckdb": "STARTS_WITH(x, 'abc')", + }, + write={ + "redshift": "x LIKE 'abc' || '%'", + }, + ) self.validate_all( "SELECT SYSDATE", @@ -203,18 +212,6 @@ class TestRedshift(Validator): "redshift": "SELECT CAST('abc' AS CHAR)", }, ) - self.validate_all( - "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", - write={ - "redshift": "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", - }, - ) - self.validate_all( - 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5', - write={ - "redshift": 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5' - }, - ) self.validate_all( "SELECT DISTINCT ON (a) a, b FROM x ORDER BY c DESC", write={ @@ -293,6 +290,7 @@ class TestRedshift(Validator): ) def test_identity(self): + self.validate_identity("LISTAGG(DISTINCT foo, ', ')") self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") self.validate_identity("SELECT * FROM #x") @@ -305,6 +303,12 @@ class TestRedshift(Validator): self.validate_identity("CREATE TABLE datetable (start_date DATE, end_date DATE)") self.validate_identity("SELECT APPROXIMATE AS y") self.validate_identity("CREATE TABLE t (c BIGINT IDENTITY(0, 1))") + self.validate_identity( + "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid" + ) + self.validate_identity( + """SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE '%start\\\\_%' LIMIT 5""" + ) self.validate_identity( """SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)""" ) @@ -463,6 +467,10 @@ FROM ( "": "INSERT INTO t(a, b) SELECT a, b FROM (VALUES (1, 2), (3, 4)) AS t (a, b)", }, ) + self.validate_identity("CREATE TABLE table_backup BACKUP NO AS SELECT * FROM event") + self.validate_identity("CREATE TABLE table_backup BACKUP YES AS SELECT * FROM event") + self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP NO") + self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP YES") def test_create_table_like(self): self.validate_identity( @@ -499,7 +507,11 @@ FROM ( def test_varchar_max(self): self.validate_all( - "CREATE TABLE TEST (cola VARCHAR(MAX))", + 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', + read={ + "redshift": "CREATE TABLE TEST (cola VARCHAR(max))", + "tsql": "CREATE TABLE TEST (cola VARCHAR(max))", + }, write={ "redshift": 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', }, diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index e48f811..a41d35a 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -40,6 +40,19 @@ WHERE )""", ) + self.validate_identity("SELECT TIMEADD(HOUR, 2, CAST('09:05:03' AS TIME))") + self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT))") + self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS OBJECT(a CHAR NOT NULL))") + self.validate_identity("SELECT CAST([1, 2, 3] AS ARRAY(INT))") + self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR) RENAME FIELDS)") + self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR, y VARCHAR) ADD FIELDS)") + self.validate_identity("SELECT TO_TIMESTAMP(123.4)").selects[0].assert_is(exp.Anonymous) + self.validate_identity("SELECT TO_TIME(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_NTZ(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_LTZ(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_TZ(x) FROM t") + self.validate_identity("TO_DECIMAL(expr, fmt, precision, scale)") self.validate_identity("ALTER TABLE authors ADD CONSTRAINT c1 UNIQUE (id, email)") self.validate_identity("RM @parquet_stage", check_command_warning=True) self.validate_identity("REMOVE @parquet_stage", check_command_warning=True) @@ -59,7 +72,6 @@ WHERE self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')") self.validate_identity("CAST(x AS GEOMETRY)") self.validate_identity("OBJECT_CONSTRUCT(*)") - self.validate_identity("SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'") self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'") self.validate_identity("SELECT HLL(*)") self.validate_identity("SELECT HLL(a)") @@ -77,18 +89,29 @@ WHERE self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING") self.validate_identity("COMMENT IF EXISTS ON TABLE foo IS 'bar'") self.validate_identity("SELECT CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', col)") - self.validate_identity("REGEXP_REPLACE('target', 'pattern', '\n')") self.validate_identity("ALTER TABLE a SWAP WITH b") + self.validate_identity("SELECT MATCH_CONDITION") self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) self.validate_identity( "SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)" ) + self.validate_identity( + "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" + ) + self.validate_identity( + "REGEXP_REPLACE('target', 'pattern', '\n')", + "REGEXP_REPLACE('target', 'pattern', '\\n')", + ) + self.validate_identity( + "SELECT a:from::STRING, a:from || ' test' ", + "SELECT CAST(GET_PATH(a, 'from') AS TEXT), GET_PATH(a, 'from') || ' test'", + ) self.validate_identity("x:from", "GET_PATH(x, 'from')") self.validate_identity( - "value:values::string", - "CAST(GET_PATH(value, 'values') AS TEXT)", + "value:values::string::int", + "CAST(CAST(GET_PATH(value, 'values') AS TEXT) AS INT)", ) self.validate_identity( """SELECT GET_PATH(PARSE_JSON('{"y": [{"z": 1}]}'), 'y[0]:z')""", @@ -132,7 +155,11 @@ WHERE ) self.validate_identity( "v:attr[0]:name", - "GET_PATH(GET_PATH(v, 'attr[0]'), 'name')", + "GET_PATH(v, 'attr[0].name')", + ) + self.validate_identity( + "a.x:from.b:c.d::int", + "CAST(GET_PATH(a.x, 'from.b.c.d') AS INT)", ) self.validate_identity( """SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""", @@ -189,10 +216,6 @@ WHERE "SELECT {fn CEILING(5.3)}", "SELECT CEIL(5.3)", ) - self.validate_identity( - "SELECT TO_TIMESTAMP(x) FROM t", - "SELECT CAST(x AS TIMESTAMPNTZ) FROM t", - ) self.validate_identity( "CAST(x AS BYTEINT)", "CAST(x AS INT)", @@ -380,6 +403,7 @@ WHERE write={ "duckdb": "{'a': b, 'c': d}", "snowflake": "OBJECT_CONSTRUCT('a', b, 'c', d)", + "": "STRUCT(b AS a, d AS c)", }, ) self.validate_identity("OBJECT_CONSTRUCT(a, b, c, d)") @@ -419,6 +443,46 @@ WHERE "sqlite": "SELECT MIN(c1), MIN(c2) FROM test", }, ) + for suffix in ( + "", + " OVER ()", + ): + self.validate_all( + f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + read={ + "snowflake": f"SELECT MEDIAN(x){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + write={ + "": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x NULLS LAST){suffix}", + "duckdb": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + ) + self.validate_all( + f"SELECT MEDIAN(x){suffix}", + write={ + "": f"SELECT PERCENTILE_CONT(x, 0.5){suffix}", + "duckdb": f"SELECT QUANTILE_CONT(x, 0.5){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + ) + for func in ( + "CORR", + "COVAR_POP", + "COVAR_SAMP", + ): + self.validate_all( + f"SELECT {func}(y, x){suffix}", + write={ + "": f"SELECT {func}(y, x){suffix}", + "duckdb": f"SELECT {func}(y, x){suffix}", + "postgres": f"SELECT {func}(y, x){suffix}", + "snowflake": f"SELECT {func}(y, x){suffix}", + }, + ) self.validate_all( "TO_CHAR(x, y)", read={ @@ -560,9 +624,9 @@ WHERE self.validate_all( '''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''', write={ - "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""", - "mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""", - "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '["b c"]')""", + "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a."b c"'""", + "mysql": """SELECT JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a."b c"')""", + "snowflake": """SELECT GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a["b c"]')""", }, ) self.validate_all( @@ -623,9 +687,16 @@ WHERE self.validate_all( "SELECT TO_TIMESTAMP('2013-04-05 01:02:03')", write={ - "bigquery": "SELECT PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2013-04-05 01:02:03')", - "snowflake": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-mm-DD hh24:mi:ss')", - "spark": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-MM-dd HH:mm:ss')", + "bigquery": "SELECT CAST('2013-04-05 01:02:03' AS DATETIME)", + "snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMPNTZ)", + "spark": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)", + }, + ) + self.validate_all( + "SELECT TO_TIME('12:05:00')", + write={ + "bigquery": "SELECT CAST('12:05:00' AS TIME)", + "snowflake": "SELECT CAST('12:05:00' AS TIME)", }, ) self.validate_all( @@ -667,9 +738,13 @@ WHERE ) self.validate_all( "ARRAY_TO_STRING(x, '')", + read={ + "duckdb": "ARRAY_TO_STRING(x, '')", + }, write={ "spark": "ARRAY_JOIN(x, '')", "snowflake": "ARRAY_TO_STRING(x, '')", + "duckdb": "ARRAY_TO_STRING(x, '')", }, ) self.validate_all( @@ -930,6 +1005,9 @@ WHERE ) self.validate_all( "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", + read={ + "snowflake": "TIMESTAMPADD(DAY, 5, CAST('2008-12-25' AS DATE))", + }, write={ "bigquery": "DATE_ADD(CAST('2008-12-25' AS DATE), INTERVAL 5 DAY)", "snowflake": "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", @@ -952,6 +1030,46 @@ WHERE self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)") self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)") + self.validate_identity("TO_DATE('12345')").assert_is(exp.Anonymous) + + self.validate_identity( + "SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'", + "SELECT CAST('2019-02-28' AS DATE) + INTERVAL '1 day, 1 year'", + ) + + self.validate_identity("DATE(x)").assert_is(exp.Anonymous) + self.validate_identity("TO_DATE(x)").assert_is(exp.Anonymous) + self.validate_identity("TRY_TO_DATE(x)").assert_is(exp.Anonymous) + + self.validate_all( + "TO_DATE(x, 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE(x, 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME(x, '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "TO_DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "TRY_TO_DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TRY_TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") @@ -1046,6 +1164,9 @@ WHERE self.validate_identity("CREATE SCHEMA mytestschema_clone CLONE testschema") self.validate_identity("CREATE TABLE IDENTIFIER('foo') (COLUMN1 VARCHAR, COLUMN2 VARCHAR)") self.validate_identity("CREATE TABLE IDENTIFIER($foo) (col1 VARCHAR, col2 VARCHAR)") + self.validate_identity( + "DROP function my_udf (OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN)))" + ) self.validate_identity( "CREATE TABLE orders_clone_restore CLONE orders AT (TIMESTAMP => TO_TIMESTAMP_TZ('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss'))" ) @@ -1061,6 +1182,17 @@ WHERE self.validate_identity( "CREATE OR REPLACE TABLE EXAMPLE_DB.DEMO.USERS (ID DECIMAL(38, 0) NOT NULL, PRIMARY KEY (ID), FOREIGN KEY (CITY_CODE) REFERENCES EXAMPLE_DB.DEMO.CITIES (CITY_CODE))" ) + self.validate_identity( + "CREATE ICEBERG TABLE my_iceberg_table (amount ARRAY(INT)) CATALOG='SNOWFLAKE' EXTERNAL_VOLUME='my_external_volume' BASE_LOCATION='my/relative/path/from/extvol'" + ) + self.validate_identity( + "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS $$ SELECT 'foo' $$", + "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS ' SELECT \\'foo\\' '", + ) + self.validate_identity( + "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE(col1 ARRAY(INT)) AS $$ WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t $$", + "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE (col1 ARRAY(INT)) AS ' WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t '", + ) self.validate_all( "CREATE TABLE orders_clone CLONE orders", @@ -1292,7 +1424,6 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene "spark": "SELECT `c0`, `c1` FROM (VALUES (1, 2), (3, 4)) AS `t0`(`c0`, `c1`)", }, ) - self.validate_all( """SELECT $1 AS "_1" FROM VALUES ('a'), ('b')""", write={ @@ -1300,6 +1431,18 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene "spark": """SELECT ${1} AS `_1` FROM VALUES ('a'), ('b')""", }, ) + self.validate_all( + "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x) AS t", + read={ + "duckdb": "SELECT * FROM (VALUES ({'a': 1})) AS t(x)", + }, + ) + self.validate_all( + "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x UNION ALL SELECT OBJECT_CONSTRUCT('a', 2)) AS t", + read={ + "duckdb": "SELECT * FROM (VALUES ({'a': 1}), ({'a': 2})) AS t(x)", + }, + ) def test_describe_table(self): self.validate_all( diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 1cf1ede..18f1fb7 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -1,6 +1,7 @@ from unittest import mock from sqlglot import exp, parse_one +from sqlglot.dialects.dialect import Dialects from tests.dialects.test_dialect import Validator @@ -245,12 +246,15 @@ TBLPROPERTIES ( self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)") self.validate_identity("REFRESH TABLE a.b.c") self.validate_identity("INTERVAL -86 DAYS") - self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("TRIM(' SparkSQL ')") self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") + self.validate_identity( + "SELECT UNIX_TIMESTAMP()", + "SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())", + ) self.validate_identity( "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 23 HOUR + 59 MINUTE + 59 SECONDS", "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + INTERVAL '59' MINUTE + INTERVAL '59' SECONDS", @@ -280,6 +284,18 @@ TBLPROPERTIES ( "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", ) + self.validate_all( + "SELECT SPLIT('123|789', '\\\\|')", + read={ + "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", + "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", + }, + write={ + "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", + "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", + "spark": "SELECT SPLIT('123|789', '\\\\|')", + }, + ) self.validate_all( "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", write={ @@ -366,7 +382,7 @@ TBLPROPERTIES ( "hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)", "redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", - "snowflake": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", + "snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", "spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", }, ) @@ -644,10 +660,10 @@ TBLPROPERTIES ( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a STRING, b STRING, c STRING) FROM person WHERE zip_code > 94511" ) self.validate_identity( - "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' FROM person" + "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' FROM person" ) self.validate_identity( - "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') FROM person WHERE zip_code > 94511" + "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') FROM person WHERE zip_code > 94511" ) self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' FROM person WHERE zip_code > 94500" @@ -720,3 +736,16 @@ TBLPROPERTIES ( "presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))", }, ) + + def test_strip_modifiers(self): + without_modifiers = "SELECT * FROM t" + with_modifiers = f"{without_modifiers} CLUSTER BY y DISTRIBUTE BY x SORT BY z" + query = self.parse_one(with_modifiers) + + for dialect in Dialects: + with self.subTest(f"Transpiling query with CLUSTER/DISTRIBUTE/SORT BY to {dialect}"): + name = dialect.value + if name in ("", "databricks", "hive", "spark", "spark2"): + self.assertEqual(query.sql(name), with_modifiers) + else: + self.assertEqual(query.sql(name), without_modifiers) diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py index 2421987..f3cde0b 100644 --- a/tests/dialects/test_sqlite.py +++ b/tests/dialects/test_sqlite.py @@ -6,58 +6,6 @@ from sqlglot.helper import logger as helper_logger class TestSQLite(Validator): dialect = "sqlite" - def test_ddl(self): - self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)") - self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)") - - self.validate_all( - """ - CREATE TABLE "Track" - ( - CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), - FOREIGN KEY ("AlbumId") REFERENCES "Album" ( - "AlbumId" - ) ON DELETE NO ACTION ON UPDATE NO ACTION, - FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, - FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT - ) - """, - write={ - "sqlite": """CREATE TABLE "Track" ( - CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), - FOREIGN KEY ("AlbumId") REFERENCES "Album" ( - "AlbumId" - ) ON DELETE NO ACTION ON UPDATE NO ACTION, - FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, - FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT -)""", - }, - pretty=True, - ) - self.validate_all( - "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", - read={ - "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", - }, - write={ - "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", - "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", - "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", - }, - ) - self.validate_all( - """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", - write={ - "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", - "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", - }, - ) - def test_sqlite(self): self.validate_identity("SELECT DATE()") self.validate_identity("SELECT DATE('now', 'start of month', '+1 month', '-1 day')") @@ -65,7 +13,6 @@ class TestSQLite(Validator): self.validate_identity("SELECT DATETIME(1092941466, 'auto')") self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch', 'localtime')") self.validate_identity("SELECT UNIXEPOCH()") - self.validate_identity("SELECT STRFTIME('%s')") self.validate_identity("SELECT JULIANDAY('now') - JULIANDAY('1776-07-04')") self.validate_identity("SELECT UNIXEPOCH() - UNIXEPOCH('2004-01-01 02:34:56')") self.validate_identity("SELECT DATE('now', 'start of year', '+9 months', 'weekday 2')") @@ -145,6 +92,29 @@ class TestSQLite(Validator): write={"snowflake": "LEAST(x, y, z)"}, ) + def test_strftime(self): + self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')") + self.validate_identity("SELECT STRFTIME('%Y-%m-%d', '2016-10-16', 'start of month')") + self.validate_identity( + "SELECT STRFTIME('%s')", + "SELECT STRFTIME('%s', CURRENT_TIMESTAMP)", + ) + + self.validate_all( + "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", + write={ + "duckdb": "SELECT STRFTIME(CAST('2020-01-01 12:05:03' AS TIMESTAMP), '%Y-%m-%d')", + "sqlite": "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", + }, + ) + self.validate_all( + "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", + write={ + "duckdb": "SELECT STRFTIME(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), '%Y-%m-%d')", + "sqlite": "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", + }, + ) + def test_datediff(self): self.validate_all( "DATEDIFF(a, b, 'day')", @@ -190,3 +160,59 @@ class TestSQLite(Validator): ) self.assertIn("Named columns are not supported in table alias.", cm.output[0]) + + def test_ddl(self): + for conflict_action in ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"): + with self.subTest(f"ON CONFLICT {conflict_action}"): + self.validate_identity("CREATE TABLE a (b, c, UNIQUE (b, c) ON CONFLICT IGNORE)") + + self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)") + self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)") + + self.validate_all( + """ + CREATE TABLE "Track" + ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album" ( + "AlbumId" + ) ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT + ) + """, + write={ + "sqlite": """CREATE TABLE "Track" ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album" ( + "AlbumId" + ) ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT +)""", + }, + pretty=True, + ) + self.validate_all( + "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + read={ + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + }, + write={ + "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", + }, + ) + self.validate_all( + """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", + write={ + "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", + "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", + }, + ) diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index f3894fd..a85ca8c 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -5,6 +5,7 @@ class TestTeradata(Validator): dialect = "teradata" def test_teradata(self): + self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)") self.validate_identity("SELECT TOP 10 * FROM tbl") self.validate_identity("SELECT * FROM tbl SAMPLE 5") self.validate_identity( @@ -100,7 +101,9 @@ class TestTeradata(Validator): self.validate_identity( "CREATE VOLATILE SET TABLE example1 AS (SELECT col1, col2, col3 FROM table1) WITH DATA PRIMARY INDEX (col1) ON COMMIT PRESERVE ROWS" ) - + self.validate_identity( + "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)" + ) self.validate_all( """ CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index ed474fd..aefd857 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -272,6 +272,28 @@ class TestTSQL(Validator): "SELECT [x].[y] FROM foo", ) + self.validate_all( + "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", + read={ + "postgres": "SELECT * FROM t OFFSET 2", + }, + write={ + "postgres": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST OFFSET 2", + "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", + }, + ) + self.validate_all( + "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", + read={ + "duckdb": "SELECT * FROM t LIMIT 10 OFFSET 5", + "sqlite": "SELECT * FROM t LIMIT 5, 10", + "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", + }, + write={ + "duckdb": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST LIMIT 10 OFFSET 5", + "sqlite": "SELECT * FROM t ORDER BY (SELECT NULL) LIMIT 10 OFFSET 5", + }, + ) self.validate_all( "SELECT CAST([a].[b] AS SMALLINT) FROM foo", write={ @@ -720,6 +742,9 @@ class TestTSQL(Validator): ) def test_ddl(self): + for view_attr in ("ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"): + self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x") + expression = parse_one("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B", dialect="tsql") self.assertIsInstance(expression, exp.AlterTable) self.assertIsInstance(expression.args["actions"][0], exp.Drop) @@ -1549,7 +1574,7 @@ WHERE "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))", "redshift": "LAST_DAY(CAST(GETDATE() AS DATE))", - "snowflake": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "snowflake": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "tsql": "EOMONTH(CAST(GETDATE() AS DATE))", }, @@ -1564,7 +1589,7 @@ WHERE "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))", "redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", - "snowflake": "LAST_DAY(DATEADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))", "spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))", "tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", }, diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index 6d3bb07..5453a78 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -141,6 +141,7 @@ x ILIKE '%y%' ESCAPE '\' INTERVAL '1' DAY INTERVAL '1' MONTH INTERVAL '1' YEAR +INTERVAL '1' HOUR TO SECOND INTERVAL '-1' CURRENT_DATE INTERVAL '-31' CAST(GETDATE() AS DATE) INTERVAL (1 + 3) DAYS @@ -149,6 +150,7 @@ INTERVAL '1' DAY * 5 CASE WHEN TRUE THEN INTERVAL '15' DAYS END CASE WHEN TRUE THEN 1 ELSE interval END CASE WHEN TRUE THEN 1 ELSE "INTERVAL" END +SELECT asof FROM x SELECT * WHERE interval IS NULL SELECT * WHERE NOT interval IS NULL SELECT * WHERE INTERVAL "is" > 1 @@ -176,6 +178,7 @@ COUNT(DISTINCT CASE WHEN DATE_TRUNC('ISOWEEK', DATE(time_field)) = DATE_TRUNC('I COUNT(a, b) x[y - 1] CASE WHEN SUM(x) > 3 THEN 1 END OVER (PARTITION BY x) +ANY(x) OVER (PARTITION BY x) SUM(ROW() OVER (PARTITION BY x)) SUM(ROW() OVER (PARTITION BY x + 1)) SUM(ROW() OVER (PARTITION BY x AND y)) @@ -361,7 +364,6 @@ SELECT GREATEST(a, b, c) FROM test SELECT LAST_VALUE(a) FROM test SELECT LAST_VALUE(a) IGNORE NULLS OVER () + 1 SELECT LN(a) FROM test -SELECT LOG10(a) FROM test SELECT MAX(a) FROM test SELECT MIN(a) FROM test SELECT POWER(a, 2) FROM test @@ -476,6 +478,7 @@ SELECT 1 UNION (SELECT 2) ORDER BY x SELECT * FROM (((SELECT 1) UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1) SELECT * FROM ((SELECT 1 AS x) CROSS JOIN (SELECT 2 AS y)) AS z ((SELECT 1) EXCEPT (SELECT 2)) +((SELECT 1)) LIMIT 1 VALUES (1) UNION SELECT * FROM x WITH a AS (SELECT 1) SELECT a.* FROM a WITH a AS (SELECT 1), b AS (SELECT 2) SELECT a.*, b.* FROM a CROSS JOIN b @@ -637,6 +640,8 @@ CREATE DATABASE IF NOT EXISTS y CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END' CREATE TABLE T3 AS (SELECT DISTINCT A FROM T1 EXCEPT (SELECT A FROM T2) LIMIT 1) DESCRIBE x +DESCRIBE EXTENDED a.b +DESCRIBE FORMATTED a.b DROP INDEX a.b.c DROP FUNCTION a.b.c (INT) DROP MATERIALIZED VIEW x.y.z @@ -831,6 +836,7 @@ SELECT * FROM schema.case SELECT * FROM current_date SELECT * FROM schema.current_date SELECT /*+ SOME_HINT(foo) */ 1 +SELECT /*+ REBALANCE */ * FROM foo SELECT * FROM (tbl1 CROSS JOIN (SELECT * FROM tbl2) AS t1) /* comment1 */ INSERT INTO x /* comment2 */ VALUES (1, 2, 3) /* comment1 */ UPDATE tbl /* comment2 */ SET x = 2 WHERE x < 2 @@ -857,3 +863,5 @@ SELECT truncate SELECT only TRUNCATE(a, b) SELECT enum +SELECT unlogged +SELECT name diff --git a/tests/fixtures/optimizer/canonicalize.sql b/tests/fixtures/optimizer/canonicalize.sql index 98b2f07..e4c78b7 100644 --- a/tests/fixtures/optimizer/canonicalize.sql +++ b/tests/fixtures/optimizer/canonicalize.sql @@ -2,7 +2,7 @@ SELECT w.d + w.e AS c FROM w AS w; SELECT CONCAT("w"."d", "w"."e") AS "c" FROM "w" AS "w"; SELECT CAST(w.d AS DATE) > w.e AS a FROM w AS w; -SELECT CAST("w"."d" AS DATE) > CAST("w"."e" AS DATE) AS "a" FROM "w" AS "w"; +SELECT CAST("w"."d" AS DATE) > CAST("w"."e" AS DATETIME) AS "a" FROM "w" AS "w"; SELECT CAST(1 AS VARCHAR) AS a FROM w AS w; SELECT CAST(1 AS VARCHAR) AS "a" FROM "w" AS "w"; @@ -97,6 +97,15 @@ DATE_TRUNC('DAY', CAST('2023-01-01' AS DATE)); DATEDIFF('2023-01-01', '2023-01-02', DAY); DATEDIFF(CAST('2023-01-01' AS DATETIME), CAST('2023-01-02' AS DATETIME), DAY); +SELECT "t"."d" > '2023-01-01' AS "d" FROM "temporal" AS "t"; +SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t"; + +SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t"; +SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t"; + +SELECT "t"."t" > '2023-01-01 00:00:01' AS "t" FROM "temporal" AS "t"; +SELECT "t"."t" > CAST('2023-01-01 00:00:01' AS DATETIME) AS "t" FROM "temporal" AS "t"; + -------------------------------------- -- Remove redundant casts -------------------------------------- diff --git a/tests/fixtures/optimizer/merge_subqueries.sql b/tests/fixtures/optimizer/merge_subqueries.sql index 0f22925..f953539 100644 --- a/tests/fixtures/optimizer/merge_subqueries.sql +++ b/tests/fixtures/optimizer/merge_subqueries.sql @@ -429,4 +429,20 @@ WHERE q.a AS a FROM q AS q ); -SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y); \ No newline at end of file +SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y); + +# title: dont merge when inner query has ORDER BY and outer query is UNION +WITH q AS ( + SELECT + x.a AS a + FROM x + ORDER BY x.a +) +SELECT + q.a AS a +FROM q +UNION ALL +SELECT + 1 AS a; +WITH q AS (SELECT x.a AS a FROM x AS x ORDER BY x.a) SELECT q.a AS a FROM q AS q UNION ALL SELECT 1 AS a; + diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 990453b..cc72e6d 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -33,16 +33,17 @@ FROM ( WHERE object_pointstext IS NOT NULL ); CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS -SELECT - TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`, - TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value` -FROM ( +WITH `_q_1` AS ( SELECT EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points` FROM `boo` AS `boo` WHERE NOT `boo`.`object_pointstext` IS NULL -) AS `_q_1`; +) +SELECT + TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`, + TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value` +FROM `_q_1` AS `_q_1`; # title: Union in CTE WITH cte AS ( @@ -480,8 +481,8 @@ JOIN "company_table" AS "company_table_2" LEFT JOIN "unlocked" AS "unlocked" ON "company_table_2"."id" = "unlocked"."company_id" WHERE - NOT "company_table_2"."id" IS NULL - AND CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE; + CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE + AND NOT "company_table_2"."id" IS NULL; # title: db.table alias clash # execute: false @@ -823,7 +824,7 @@ SELECT FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `TOp_TeRmS` WHERE `TOp_TeRmS`.`rank` = 1 - AND CAST(`TOp_TeRmS`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) + AND `TOp_TeRmS`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) GROUP BY `day`, `top_term`, @@ -1379,11 +1380,11 @@ JOIN `date_dim` AS `date_dim` AND `date_dim`.`d_date` >= '2002-02-01' WHERE `_u_3`.`_u_4` IS NULL - AND NOT `_u_0`.`_u_1` IS NULL AND ( SIZE(`_u_0`.`_u_2`) = 0 OR SIZE(FILTER(`_u_0`.`_u_2`, `_x` -> `cs1`.`cs_warehouse_sk` <> `_x`)) <> 0 ) + AND NOT `_u_0`.`_u_1` IS NULL ORDER BY COUNT(DISTINCT `cs1`.`cs_order_number`) LIMIT 100; diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql index b7103ef..47972ac 100644 --- a/tests/fixtures/optimizer/pushdown_projections.sql +++ b/tests/fixtures/optimizer/pushdown_projections.sql @@ -79,6 +79,9 @@ WITH y AS (SELECT MAX(1) AS _ FROM x AS x) SELECT 1 AS "1" FROM y AS y; WITH y AS (SELECT a FROM x GROUP BY a) SELECT 1 FROM y; WITH y AS (SELECT 1 AS _ FROM x AS x GROUP BY x.a) SELECT 1 AS "1" FROM y AS y; +WITH cte AS (SELECT col FROM t) SELECT IF(1 IN UNNEST(col), 1, 0) AS col FROM cte; +WITH cte AS (SELECT t.col AS col FROM t AS t) SELECT CASE WHEN 1 IN (SELECT UNNEST(cte.col)) THEN 1 ELSE 0 END AS col FROM cte AS cte; + -------------------------------------- -- Unknown Star Expansion -------------------------------------- @@ -106,3 +109,6 @@ WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS SELECT * FROM ((SELECT c FROM t1) JOIN t2); SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2); + +SELECT a, d FROM (SELECT 1 a, 2 c, 3 d, 4 e UNION ALL BY NAME SELECT 5 b, 6 c, 7 d, 8 a, 9 e) +SELECT a, d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7 d, 8 a) diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 71c6f45..289145b 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -96,6 +96,12 @@ SELECT 2 AS "2" FROM x AS x GROUP BY 1; SELECT 'a' AS a FROM x GROUP BY 1; SELECT 'a' AS a FROM x AS x GROUP BY 1; +SELECT NULL AS a FROM x GROUP BY 1; +SELECT NULL AS a FROM x AS x GROUP BY 1; + +SELECT TRUE AS a FROM x GROUP BY 1; +SELECT TRUE AS a FROM x AS x GROUP BY 1; + # execute: false # dialect: oracle SELECT t."col" FROM tbl t; @@ -121,6 +127,10 @@ SELECT 2 AS d FROM x AS x GROUP BY 1 ORDER BY d; SELECT DATE(a), DATE(b) AS c FROM x GROUP BY 1, 2; SELECT DATE(x.a) AS _col_0, DATE(x.b) AS c FROM x AS x GROUP BY DATE(x.a), DATE(x.b); +# execute: false +SELECT (SELECT MIN(a) FROM UNNEST([1, 2])) AS f FROM x GROUP BY 1; +SELECT (SELECT MIN(_q_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _q_0) AS f FROM x AS x GROUP BY 1; + SELECT SUM(x.a) AS c FROM x JOIN y ON x.b = y.b GROUP BY c; SELECT SUM(x.a) AS c FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY y.c; @@ -580,8 +590,8 @@ SELECT * FROM ((SELECT * FROM tbl)); SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _q_0); # execute: false -SELECT * FROM ((SELECT c FROM t1) JOIN t2); -SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2); +SELECT * FROM ((SELECT c FROM t1) CROSS JOIN t2); +SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0 CROSS JOIN t2 AS t2); # execute: false SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c); diff --git a/tests/fixtures/optimizer/qualify_columns_ddl.sql b/tests/fixtures/optimizer/qualify_columns_ddl.sql index 907780b..9b4bb34 100644 --- a/tests/fixtures/optimizer/qualify_columns_ddl.sql +++ b/tests/fixtures/optimizer/qualify_columns_ddl.sql @@ -1,6 +1,10 @@ # title: Create with CTE WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM cte; -WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT cte.b AS b FROM cte AS cte; +CREATE TABLE s AS WITH cte AS (SELECT y.b AS b FROM y AS y) SELECT cte.b AS b FROM cte AS cte; + +# title: Create with CTE, query also has CTE +WITH cte1 AS (SELECT b FROM y) CREATE TABLE s AS WITH cte2 AS (SELECT b FROM cte1) SELECT * FROM cte2; +CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte2.b AS b FROM cte2 AS cte2; # title: Create without CTE CREATE TABLE foo AS SELECT a FROM tbl; @@ -8,15 +12,15 @@ CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl; # title: Create with complex CTE with derived table WITH cte AS (SELECT a FROM (SELECT a from x)) CREATE TABLE s AS SELECT * FROM cte; -WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; +CREATE TABLE s AS WITH cte AS (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) SELECT cte.a AS a FROM cte AS cte; # title: Create wtih multiple CTEs WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2; -WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte2.b AS b FROM cte2 AS cte2; +CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte2.b AS b FROM cte2 AS cte2; # title: Create with multiple CTEs, selecting only from the first CTE (unnecessary code) WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte1; -WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte1.b AS b FROM cte1 AS cte1; +CREATE TABLE s AS WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte1.b AS b FROM cte1 AS cte1; # title: Create with multiple derived tables CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y)); @@ -24,9 +28,10 @@ CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT y.b A # title: Create with a CTE and a derived table WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte)); -WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1; +CREATE TABLE s AS WITH cte AS (SELECT y.b AS b FROM y AS y) SELECT _q_1.b AS b FROM (SELECT _q_0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _q_0) AS _q_1; # title: Insert with CTE +# dialect: spark WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte; WITH cte AS (SELECT y.b AS b FROM y AS y) INSERT INTO s SELECT cte.b AS b FROM cte AS cte; diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 99b5153..f651a87 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -159,6 +159,7 @@ CREATE TABLE t1 AS (WITH cte AS (SELECT x FROM t2) SELECT * FROM cte); CREATE TABLE c.db.t1 AS (WITH cte AS (SELECT x FROM c.db.t2 AS t2) SELECT * FROM cte AS cte); # title: insert statement with cte +# dialect: spark WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte; WITH cte AS (SELECT b FROM c.db.y AS y) INSERT INTO c.db.s SELECT * FROM cte AS cte; diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index da9f26d..a10942d 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -52,6 +52,9 @@ FALSE; 'x' = 'x'; TRUE; +STRUCT(NULL AS a); +STRUCT(NULL AS a); + NULL AND TRUE; NULL; @@ -102,6 +105,10 @@ a AND b; a AND (b AND b); a AND b; +-- bigquery doesn't allow unparenthesis comparisons +(x is not null) != (y is null); +(NOT x IS NULL) <> (y IS NULL); + -------------------------------------- -- Absorption -------------------------------------- @@ -459,6 +466,18 @@ CAST('1998-09-02 00:00:00' AS DATETIME); CAST(x AS DATETIME) + interval '1' WEEK; CAST(x AS DATETIME) + INTERVAL '1' WEEK; +# dialect: bigquery +CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 1 DAY; +CAST('2023-01-02 00:00:00' AS TIMESTAMP); + +# dialect: bigquery +INTERVAL 1 DAY + CAST('2023-01-01' AS TIMESTAMP); +CAST('2023-01-02 00:00:00' AS TIMESTAMP); + +# dialect: bigquery +CAST('2023-01-02' AS TIMESTAMP) - INTERVAL 1 DAY; +CAST('2023-01-01 00:00:00' AS TIMESTAMP); + TS_OR_DS_TO_DATE('1998-12-01 00:00:01') - interval '90' day; CAST('1998-09-02' AS DATE); @@ -708,6 +727,48 @@ FUN() > 0; RAND() > 0 OR RAND() > 1; RAND() > 0 OR RAND() > 1; +CAST(1 AS UINT) >= 0; +TRUE; + +CAST(-1 AS TINYINT) <= 0; +TRUE; + +CAST(1 AS INT) = CAST(1 AS UINT); +TRUE; + +CASE WHEN CAST(1 AS TINYINT) = 1 THEN FALSE ELSE TRUE END; +FALSE; + +CAST(1 AS INT) + 1; +CAST(1 AS INT) + 1; + +CAST(CAST(CAST(-1 AS INT) AS INT) AS INT) = -1; +TRUE; + +CAST(-1 AS UINT) <= 0; +CAST(-1 AS UINT) <= 0; + +CAST(-129 AS TINYINT) <= 0; +CAST(-129 AS TINYINT) <= 0; + +CAST(256 AS UINT) >= 0; +CAST(256 AS UINT) >= 0; + +CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1; +CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1; + +CAST(x AS TINYINT) = 1; +CAST(x AS TINYINT) = 1; + +CAST(CAST(1 AS INT) AS BOOLEAN) = 1; +CAST(CAST(1 AS INT) AS BOOLEAN) = 1; + +CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1; +CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1; + +x > CAST('2023-01-01' AS DATE) AND x < CAST('2023-01-01' AS DATETIME); +FALSE; + -------------------------------------- -- COALESCE -------------------------------------- @@ -745,7 +806,7 @@ COALESCE(ROW() OVER (), 1) = 1; ROW() OVER () = 1 OR ROW() OVER () IS NULL; a AND b AND COALESCE(ROW() OVER (), 1) = 1; -a AND b AND (ROW() OVER () = 1 OR ROW() OVER () IS NULL); +(ROW() OVER () = 1 OR ROW() OVER () IS NULL) AND a AND b; COALESCE(1, 2); 1; @@ -823,6 +884,10 @@ CAST('2023-12-11' AS DATE); DATE_TRUNC(CAST('2023-12-15' AS DATE), WEEK); CAST('2023-12-10' AS DATE); +# dialect: bigquery +DATE_TRUNC(CAST('2023-10-01' AS TIMESTAMP), QUARTER); +CAST('2023-10-01 00:00:00' AS TIMESTAMP); + # dialect: bigquery DATE_TRUNC(CAST('2023-12-16' AS DATE), WEEK); CAST('2023-12-10' AS DATE); @@ -830,21 +895,41 @@ CAST('2023-12-10' AS DATE); DATE_TRUNC('year', x) = CAST('2021-01-01' AS DATE); x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); +# dialect: bigquery +DATE_TRUNC(x, year) = CAST('2021-01-01' AS TIMESTAMP); +x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); + DATE_TRUNC('quarter', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-04-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); +# dialect: bigquery +DATE_TRUNC(x, quarter) = CAST('2021-01-01' AS TIMESTAMP); +x < CAST('2021-04-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); + DATE_TRUNC('month', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-02-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); +# dialect: bigquery +DATE_TRUNC(x, month) = CAST('2021-01-01' AS TIMESTAMP); +x < CAST('2021-02-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); + DATE_TRUNC('week', x) = CAST('2021-01-04' AS DATE); x < CAST('2021-01-11' AS DATE) AND x >= CAST('2021-01-04' AS DATE); DATE_TRUNC('day', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-01-02' AS DATE) AND x >= CAST('2021-01-01' AS DATE); +# dialect: bigquery +DATE_TRUNC(x, DAY) = CAST('2021-01-01' AS TIMESTAMP); +x < CAST('2021-01-02 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); + CAST('2021-01-01' AS DATE) = DATE_TRUNC('year', x); x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); +# dialect: bigquery +CAST('2021-01-01' AS TIMESTAMP) = DATE_TRUNC(x, year); +x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); + -- Always false, except for nulls DATE_TRUNC('quarter', x) = CAST('2021-01-02' AS DATE); DATE_TRUNC('QUARTER', x) = CAST('2021-01-02' AS DATE); @@ -859,12 +944,20 @@ DATE_TRUNC('YEAR', x) <> CAST('2021-01-02' AS DATE); DATE_TRUNC('year', x) <= CAST('2021-01-01' AS DATE); x < CAST('2022-01-01' AS DATE); +# dialect: bigquery +DATE_TRUNC(x, year) <= CAST('2021-01-01' AS TIMESTAMP); +x < CAST('2022-01-01 00:00:00' AS TIMESTAMP); + DATE_TRUNC('year', x) <= CAST('2021-01-02' AS DATE); x < CAST('2022-01-01' AS DATE); CAST('2021-01-01' AS DATE) >= DATE_TRUNC('year', x); x < CAST('2022-01-01' AS DATE); +# dialect: bigquery +CAST('2021-01-01' AS TIMESTAMP) >= DATE_TRUNC(x, year); +x < CAST('2022-01-01 00:00:00' AS TIMESTAMP); + DATE_TRUNC('year', x) < CAST('2021-01-01' AS DATE); x < CAST('2021-01-01' AS DATE); @@ -896,6 +989,10 @@ DATE_TRUNC('YEAR', x) <> '2021-01-02'; DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2023-01-01' AS DATE)); (x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE)) OR (x < CAST('2024-01-01' AS DATE) AND x >= CAST('2023-01-01' AS DATE)); +# dialect: bigquery +DATE_TRUNC(x, year) IN (CAST('2021-01-01' AS TIMESTAMP), CAST('2023-01-01' AS TIMESTAMP)); +(x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP)) OR (x < CAST('2024-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2023-01-01 00:00:00' AS TIMESTAMP)); + -- merge ranges DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2022-01-01' AS DATE)); x < CAST('2023-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); diff --git a/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz b/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz index ad5043f..6f51952 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz and b/tests/fixtures/optimizer/tpc-ds/call_center.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz index eed1508..9a736ff 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz and b/tests/fixtures/optimizer/tpc-ds/catalog_page.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz index e160514..9092c1f 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz and b/tests/fixtures/optimizer/tpc-ds/catalog_returns.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz index 1828149..ac9058b 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz and b/tests/fixtures/optimizer/tpc-ds/catalog_sales.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/customer.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer.csv.gz index 2277f72..5545923 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/customer.csv.gz and b/tests/fixtures/optimizer/tpc-ds/customer.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz index c553721..7c24e8c 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz and b/tests/fixtures/optimizer/tpc-ds/customer_address.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz b/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz index dfc65a0..582d4e5 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz and b/tests/fixtures/optimizer/tpc-ds/customer_demographics.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz b/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz index 26280bf..9960663 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz and b/tests/fixtures/optimizer/tpc-ds/date_dim.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz b/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz index f0cde03..84efa06 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz and b/tests/fixtures/optimizer/tpc-ds/household_demographics.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz b/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz index 4374587..8c60109 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz and b/tests/fixtures/optimizer/tpc-ds/income_band.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz b/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz index 5afaaf6..d171ae0 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz and b/tests/fixtures/optimizer/tpc-ds/inventory.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/item.csv.gz b/tests/fixtures/optimizer/tpc-ds/item.csv.gz index 9f65d87..effacb3 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/item.csv.gz and b/tests/fixtures/optimizer/tpc-ds/item.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz b/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz index e8692c2..918e9c1 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz and b/tests/fixtures/optimizer/tpc-ds/promotion.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/reason.csv.gz b/tests/fixtures/optimizer/tpc-ds/reason.csv.gz index de1f50f..2ad5473 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/reason.csv.gz and b/tests/fixtures/optimizer/tpc-ds/reason.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz b/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz index 14465e8..e193902 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz and b/tests/fixtures/optimizer/tpc-ds/ship_mode.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/store.csv.gz b/tests/fixtures/optimizer/tpc-ds/store.csv.gz index 8d04078..77868fc 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/store.csv.gz and b/tests/fixtures/optimizer/tpc-ds/store.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz index cba1300..d3426ab 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz and b/tests/fixtures/optimizer/tpc-ds/store_returns.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz index 68caa83..21e83df 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz and b/tests/fixtures/optimizer/tpc-ds/store_sales.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz b/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz index 3e0fa35..fae30e9 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz and b/tests/fixtures/optimizer/tpc-ds/time_dim.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 76e6431..35fbb70 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -62,6 +62,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 2 -------------------------------------- +# execute: true WITH wscs AS (SELECT sold_date_sk, sales_price @@ -107,13 +108,13 @@ WITH wscs WHERE d_date_sk = sold_date_sk GROUP BY d_week_seq) SELECT d_week_seq1, - Round(sun_sales1 / sun_sales2, 2), - Round(mon_sales1 / mon_sales2, 2), - Round(tue_sales1 / tue_sales2, 2), - Round(wed_sales1 / wed_sales2, 2), - Round(thu_sales1 / thu_sales2, 2), - Round(fri_sales1 / fri_sales2, 2), - Round(sat_sales1 / sat_sales2, 2) + Round(sun_sales1 / sun_sales2, 2) AS "_col_1", + Round(mon_sales1 / mon_sales2, 2) AS "_col_2", + Round(tue_sales1 / tue_sales2, 2) AS "_col_3", + Round(wed_sales1 / wed_sales2, 2) AS "_col_4", + Round(thu_sales1 / thu_sales2, 2) AS "_col_5", + Round(fri_sales1 / fri_sales2, 2) AS "_col_6", + Round(sat_sales1 / sat_sales2, 2) AS "_col_7" FROM (SELECT wswscs.d_week_seq d_week_seq1, sun_sales sun_sales1, mon_sales mon_sales1, @@ -213,7 +214,8 @@ JOIN "date_dim" AS "date_dim" JOIN "wswscs" AS "wswscs_2" ON "wswscs"."d_week_seq" = "wswscs_2"."d_week_seq" - 53 JOIN "date_dim" AS "date_dim_2" - ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq" AND "date_dim_2"."d_year" = 1999 + ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq" + AND "date_dim_2"."d_year" = 1999 ORDER BY "d_week_seq1"; @@ -264,6 +266,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 4 -------------------------------------- +# execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name, @@ -733,8 +736,8 @@ WITH "salesreturns" AS ( "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE - CAST("date_dim"."d_date" AS DATE) <= CAST('2002-09-05' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-08-22' AS DATE) + CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-09-05' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2002-08-22' AS DATE) ), "ssr" AS ( SELECT "store"."s_store_id" AS "s_store_id", @@ -1628,6 +1631,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 11 -------------------------------------- +# execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name @@ -1869,8 +1873,8 @@ SELECT FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-10' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-11' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-10' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-05-11' AS DATE) JOIN "item" AS "item" ON "item"."i_category" IN ('Home', 'Men', 'Women') AND "item"."i_item_sk" = "web_sales"."ws_item_sk" @@ -2326,8 +2330,9 @@ LIMIT 100; -------------------------------------- -- TPC-DS 15 -------------------------------------- +# execute: true SELECT ca_zip, - Sum(cs_sales_price) + Sum(cs_sales_price) AS "_col_1" FROM catalog_sales, customer, customer_address, @@ -2437,11 +2442,11 @@ JOIN "date_dim" AS "date_dim" AND "date_dim"."d_date" >= '2002-3-01' AND ( CAST('2002-3-01' AS DATE) + INTERVAL '60' DAY - ) >= CAST("date_dim"."d_date" AS DATE) + ) >= CAST("date_dim"."d_date" AS DATETIME) WHERE "_u_3"."_u_4" IS NULL - AND NOT "_u_0"."_u_1" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "cs1"."cs_warehouse_sk" <> "_x") + AND NOT "_u_0"."_u_1" IS NULL ORDER BY COUNT(DISTINCT "cs1"."cs_order_number") LIMIT 100; @@ -2449,6 +2454,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 17 -------------------------------------- +# execute: true SELECT i_item_id, i_item_desc, s_state, @@ -2638,6 +2644,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 19 -------------------------------------- +# execute: true SELECT i_brand_id brand_id, i_brand brand, i_manufact_id, @@ -2744,8 +2751,8 @@ SELECT FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-03-05' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-02-03' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-03-05' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2001-02-03' AS DATE) JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_category" IN ('Children', 'Women', 'Electronics') @@ -2824,8 +2831,8 @@ WITH "x" AS ( FROM "inventory" AS "inventory" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-12' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-04-13' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-12' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-04-13' AS DATE) JOIN "item" AS "item" ON "inventory"."inv_item_sk" = "item"."i_item_sk" AND "item"."i_current_price" <= 1.49 @@ -2906,6 +2913,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 23 -------------------------------------- +# execute: true WITH frequent_ss_items AS (SELECT Substr(i_item_desc, 1, 30) itemdesc, i_item_sk item_sk, @@ -2942,7 +2950,7 @@ WITH frequent_ss_items HAVING Sum(ss_quantity * ss_sales_price) > ( 95 / 100.0 ) * (SELECT * FROM max_store_sales)) -SELECT Sum(sales) +SELECT Sum(sales) AS "_col_0" FROM (SELECT cs_quantity * cs_list_price sales FROM catalog_sales, date_dim @@ -3372,49 +3380,49 @@ LIMIT 100; -- TPC-DS 28 -------------------------------------- SELECT * -FROM (SELECT Avg(ss_list_price) B1_LP, - Count(ss_list_price) B1_CNT, - Count(DISTINCT ss_list_price) B1_CNTD +FROM (SELECT Avg(ss_list_price) b1_lp, + Count(ss_list_price) b1_cnt, + Count(DISTINCT ss_list_price) b1_cntd FROM store_sales WHERE ss_quantity BETWEEN 0 AND 5 AND ( ss_list_price BETWEEN 18 AND 18 + 10 OR ss_coupon_amt BETWEEN 1939 AND 1939 + 1000 OR ss_wholesale_cost BETWEEN 34 AND 34 + 20 )) B1, - (SELECT Avg(ss_list_price) B2_LP, - Count(ss_list_price) B2_CNT, - Count(DISTINCT ss_list_price) B2_CNTD + (SELECT Avg(ss_list_price) b2_lp, + Count(ss_list_price) b2_cnt, + Count(DISTINCT ss_list_price) b2_cntd FROM store_sales WHERE ss_quantity BETWEEN 6 AND 10 AND ( ss_list_price BETWEEN 1 AND 1 + 10 OR ss_coupon_amt BETWEEN 35 AND 35 + 1000 OR ss_wholesale_cost BETWEEN 50 AND 50 + 20 )) B2, - (SELECT Avg(ss_list_price) B3_LP, - Count(ss_list_price) B3_CNT, - Count(DISTINCT ss_list_price) B3_CNTD + (SELECT Avg(ss_list_price) b3_lp, + Count(ss_list_price) b3_cnt, + Count(DISTINCT ss_list_price) b3_cntd FROM store_sales WHERE ss_quantity BETWEEN 11 AND 15 AND ( ss_list_price BETWEEN 91 AND 91 + 10 OR ss_coupon_amt BETWEEN 1412 AND 1412 + 1000 OR ss_wholesale_cost BETWEEN 17 AND 17 + 20 )) B3, - (SELECT Avg(ss_list_price) B4_LP, - Count(ss_list_price) B4_CNT, - Count(DISTINCT ss_list_price) B4_CNTD + (SELECT Avg(ss_list_price) b4_lp, + Count(ss_list_price) b4_cnt, + Count(DISTINCT ss_list_price) b4_cntd FROM store_sales WHERE ss_quantity BETWEEN 16 AND 20 AND ( ss_list_price BETWEEN 9 AND 9 + 10 OR ss_coupon_amt BETWEEN 5270 AND 5270 + 1000 OR ss_wholesale_cost BETWEEN 29 AND 29 + 20 )) B4, - (SELECT Avg(ss_list_price) B5_LP, - Count(ss_list_price) B5_CNT, - Count(DISTINCT ss_list_price) B5_CNTD + (SELECT Avg(ss_list_price) b5_lp, + Count(ss_list_price) b5_cnt, + Count(DISTINCT ss_list_price) b5_cntd FROM store_sales WHERE ss_quantity BETWEEN 21 AND 25 AND ( ss_list_price BETWEEN 45 AND 45 + 10 OR ss_coupon_amt BETWEEN 826 AND 826 + 1000 OR ss_wholesale_cost BETWEEN 5 AND 5 + 20 )) B5, - (SELECT Avg(ss_list_price) B6_LP, - Count(ss_list_price) B6_CNT, - Count(DISTINCT ss_list_price) B6_CNTD + (SELECT Avg(ss_list_price) b6_lp, + Count(ss_list_price) b6_cnt, + Count(DISTINCT ss_list_price) b6_cntd FROM store_sales WHERE ss_quantity BETWEEN 26 AND 30 AND ( ss_list_price BETWEEN 174 AND 174 + 10 @@ -3429,9 +3437,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 2939 AND "store_sales"."ss_coupon_amt" >= 1939 - OR "store_sales"."ss_list_price" <= 28 AND "store_sales"."ss_list_price" >= 18 - OR "store_sales"."ss_wholesale_cost" <= 54 AND "store_sales"."ss_wholesale_cost" >= 34 + "store_sales"."ss_coupon_amt" <= 2939 + AND "store_sales"."ss_coupon_amt" >= 1939 + OR "store_sales"."ss_list_price" <= 28 + AND "store_sales"."ss_list_price" >= 18 + OR "store_sales"."ss_wholesale_cost" <= 54 + AND "store_sales"."ss_wholesale_cost" >= 34 ) AND "store_sales"."ss_quantity" <= 5 AND "store_sales"."ss_quantity" >= 0 @@ -3443,9 +3454,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 1035 AND "store_sales"."ss_coupon_amt" >= 35 - OR "store_sales"."ss_list_price" <= 11 AND "store_sales"."ss_list_price" >= 1 - OR "store_sales"."ss_wholesale_cost" <= 70 AND "store_sales"."ss_wholesale_cost" >= 50 + "store_sales"."ss_coupon_amt" <= 1035 + AND "store_sales"."ss_coupon_amt" >= 35 + OR "store_sales"."ss_list_price" <= 11 + AND "store_sales"."ss_list_price" >= 1 + OR "store_sales"."ss_wholesale_cost" <= 70 + AND "store_sales"."ss_wholesale_cost" >= 50 ) AND "store_sales"."ss_quantity" <= 10 AND "store_sales"."ss_quantity" >= 6 @@ -3457,9 +3471,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 2412 AND "store_sales"."ss_coupon_amt" >= 1412 - OR "store_sales"."ss_list_price" <= 101 AND "store_sales"."ss_list_price" >= 91 - OR "store_sales"."ss_wholesale_cost" <= 37 AND "store_sales"."ss_wholesale_cost" >= 17 + "store_sales"."ss_coupon_amt" <= 2412 + AND "store_sales"."ss_coupon_amt" >= 1412 + OR "store_sales"."ss_list_price" <= 101 + AND "store_sales"."ss_list_price" >= 91 + OR "store_sales"."ss_wholesale_cost" <= 37 + AND "store_sales"."ss_wholesale_cost" >= 17 ) AND "store_sales"."ss_quantity" <= 15 AND "store_sales"."ss_quantity" >= 11 @@ -3471,9 +3488,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 6270 AND "store_sales"."ss_coupon_amt" >= 5270 - OR "store_sales"."ss_list_price" <= 19 AND "store_sales"."ss_list_price" >= 9 - OR "store_sales"."ss_wholesale_cost" <= 49 AND "store_sales"."ss_wholesale_cost" >= 29 + "store_sales"."ss_coupon_amt" <= 6270 + AND "store_sales"."ss_coupon_amt" >= 5270 + OR "store_sales"."ss_list_price" <= 19 + AND "store_sales"."ss_list_price" >= 9 + OR "store_sales"."ss_wholesale_cost" <= 49 + AND "store_sales"."ss_wholesale_cost" >= 29 ) AND "store_sales"."ss_quantity" <= 20 AND "store_sales"."ss_quantity" >= 16 @@ -3485,9 +3505,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 1826 AND "store_sales"."ss_coupon_amt" >= 826 - OR "store_sales"."ss_list_price" <= 55 AND "store_sales"."ss_list_price" >= 45 - OR "store_sales"."ss_wholesale_cost" <= 25 AND "store_sales"."ss_wholesale_cost" >= 5 + "store_sales"."ss_coupon_amt" <= 1826 + AND "store_sales"."ss_coupon_amt" >= 826 + OR "store_sales"."ss_list_price" <= 55 + AND "store_sales"."ss_list_price" >= 45 + OR "store_sales"."ss_wholesale_cost" <= 25 + AND "store_sales"."ss_wholesale_cost" >= 5 ) AND "store_sales"."ss_quantity" <= 25 AND "store_sales"."ss_quantity" >= 21 @@ -3499,9 +3522,12 @@ WITH "b1" AS ( FROM "store_sales" AS "store_sales" WHERE ( - "store_sales"."ss_coupon_amt" <= 6548 AND "store_sales"."ss_coupon_amt" >= 5548 - OR "store_sales"."ss_list_price" <= 184 AND "store_sales"."ss_list_price" >= 174 - OR "store_sales"."ss_wholesale_cost" <= 62 AND "store_sales"."ss_wholesale_cost" >= 42 + "store_sales"."ss_coupon_amt" <= 6548 + AND "store_sales"."ss_coupon_amt" >= 5548 + OR "store_sales"."ss_list_price" <= 184 + AND "store_sales"."ss_list_price" >= 174 + OR "store_sales"."ss_wholesale_cost" <= 62 + AND "store_sales"."ss_wholesale_cost" >= 42 ) AND "store_sales"."ss_quantity" <= 30 AND "store_sales"."ss_quantity" >= 26 @@ -3860,11 +3886,17 @@ SELECT "ss3"."store_sales" / "ss2"."store_sales" AS "store_q2_q3_increase" FROM "ss" AS "ss1" JOIN "ss" AS "ss2" - ON "ss1"."ca_county" = "ss2"."ca_county" AND "ss2"."d_qoy" = 2 AND "ss2"."d_year" = 2001 + ON "ss1"."ca_county" = "ss2"."ca_county" + AND "ss2"."d_qoy" = 2 + AND "ss2"."d_year" = 2001 JOIN "ws" AS "ws1" - ON "ss1"."ca_county" = "ws1"."ca_county" AND "ws1"."d_qoy" = 1 AND "ws1"."d_year" = 2001 + ON "ss1"."ca_county" = "ws1"."ca_county" + AND "ws1"."d_qoy" = 1 + AND "ws1"."d_year" = 2001 JOIN "ss" AS "ss3" - ON "ss2"."ca_county" = "ss3"."ca_county" AND "ss3"."d_qoy" = 3 AND "ss3"."d_year" = 2001 + ON "ss2"."ca_county" = "ss3"."ca_county" + AND "ss3"."d_qoy" = 3 + AND "ss3"."d_year" = 2001 JOIN "ws" AS "ws2" ON "ws1"."ca_county" = "ws2"."ca_county" AND "ws2"."d_qoy" = 2 @@ -3932,7 +3964,7 @@ WITH "catalog_sales_2" AS ( FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" >= '2001-03-04' - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-06-02' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-06-02' AS DATE) ), "_u_0" AS ( SELECT 1.3 * AVG("catalog_sales"."cs_ext_discount_amt") AS "_col_0", @@ -3949,7 +3981,8 @@ FROM "catalog_sales_2" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "item" AS "item" - ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_manufact_id" = 610 + ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" + AND "item"."i_manufact_id" = 610 LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "item"."i_item_sk" WHERE @@ -4132,6 +4165,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 34 -------------------------------------- +# execute: true SELECT c_last_name, c_first_name, c_salutation, @@ -4234,24 +4268,25 @@ ORDER BY -------------------------------------- -- TPC-DS 35 -------------------------------------- +# execute: true SELECT ca_state, cd_gender, cd_marital_status, cd_dep_count, Count(*) cnt1, - Stddev_samp(cd_dep_count), - Avg(cd_dep_count), - Max(cd_dep_count), + Stddev_samp(cd_dep_count) AS "_col_5", + Avg(cd_dep_count) AS "_col_6", + Max(cd_dep_count) AS "_col_7", cd_dep_employed_count, Count(*) cnt2, - Stddev_samp(cd_dep_employed_count), - Avg(cd_dep_employed_count), - Max(cd_dep_employed_count), + Stddev_samp(cd_dep_employed_count) AS "_col_10", + Avg(cd_dep_employed_count) AS "_col_11", + Max(cd_dep_employed_count) AS "_col_12", cd_dep_college_count, Count(*) cnt3, - Stddev_samp(cd_dep_college_count), - Avg(cd_dep_college_count), - Max(cd_dep_college_count) + Stddev_samp(cd_dep_college_count) AS "_col_15", + Avg(cd_dep_college_count) AS "_col_16", + Max(cd_dep_college_count) AS "_col_17" FROM customer c, customer_address ca, customer_demographics @@ -4495,8 +4530,8 @@ JOIN "inventory" AS "inventory" AND "inventory"."inv_quantity_on_hand" >= 100 JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('1999-05-05' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('1999-03-06' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('1999-05-05' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('1999-03-06' AS DATE) WHERE "item"."i_current_price" <= 50 AND "item"."i_current_price" >= 20 @@ -4512,7 +4547,8 @@ LIMIT 100; -------------------------------------- -- TPC-DS 38 -------------------------------------- -SELECT Count(*) +# execute: true +SELECT Count(*) AS "_col_0" FROM (SELECT DISTINCT c_last_name, c_first_name, d_date @@ -4771,8 +4807,8 @@ LEFT JOIN "catalog_returns" AS "catalog_returns" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-07-01' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-05-02' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-07-01' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2002-05-02' AS DATE) JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_current_price" <= 1.49 @@ -4980,10 +5016,11 @@ LIMIT 100; -------------------------------------- -- TPC-DS 42 -------------------------------------- +# execute: true SELECT dt.d_year, item.i_category_id, item.i_category, - Sum(ss_ext_sales_price) + Sum(ss_ext_sales_price) AS "_col_3" FROM date_dim dt, store_sales, item @@ -5132,7 +5169,8 @@ FROM "date_dim" AS "date_dim" JOIN "store_sales" AS "store_sales" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" - ON "store"."s_gmt_offset" = -5 AND "store"."s_store_sk" = "store_sales"."ss_store_sk" + ON "store"."s_gmt_offset" = -5 + AND "store"."s_store_sk" = "store_sales"."ss_store_sk" WHERE "date_dim"."d_year" = 2002 GROUP BY @@ -5266,9 +5304,10 @@ LIMIT 100; -------------------------------------- -- TPC-DS 45 -------------------------------------- +# execute: true SELECT ca_zip, ca_state, - Sum(ws_sales_price) + Sum(ws_sales_price) AS "_col_2" FROM web_sales, customer, customer_address, @@ -5333,6 +5372,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 46 -------------------------------------- +# execute: true SELECT c_last_name, c_first_name, ca_city, @@ -5524,10 +5564,14 @@ WITH "v1" AS ( "date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999 ) AND ( - "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 + "date_dim"."d_moy" = 12 + OR "date_dim"."d_year" = 1999 + OR "date_dim"."d_year" = 2000 ) AND ( - "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 + "date_dim"."d_year" = 1998 + OR "date_dim"."d_year" = 1999 + OR "date_dim"."d_year" = 2000 ) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" @@ -5576,7 +5620,8 @@ LIMIT 100; -------------------------------------- -- TPC-DS 48 -------------------------------------- -SELECT Sum (ss_quantity) +# execute: true +SELECT Sum (ss_quantity) AS "_col_0" FROM store_sales, store, customer_demographics, @@ -5919,6 +5964,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 50 -------------------------------------- +# execute: true SELECT s_store_name, s_company_id, s_street_number, @@ -6811,10 +6857,14 @@ WITH "v1" AS ( "date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 ) AND ( - "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001 + "date_dim"."d_moy" = 12 + OR "date_dim"."d_year" = 2000 + OR "date_dim"."d_year" = 2001 ) AND ( - "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001 + "date_dim"."d_year" = 1999 + OR "date_dim"."d_year" = 2000 + OR "date_dim"."d_year" = 2001 ) GROUP BY "item"."i_category", @@ -7056,6 +7106,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 59 -------------------------------------- +# execute: true WITH wss AS (SELECT d_week_seq, ss_store_sk, @@ -7095,13 +7146,13 @@ WITH wss SELECT s_store_name1, s_store_id1, d_week_seq1, - sun_sales1 / sun_sales2, - mon_sales1 / mon_sales2, - tue_sales1 / tue_sales2, - wed_sales1 / wed_sales2, - thu_sales1 / thu_sales2, - fri_sales1 / fri_sales2, - sat_sales1 / sat_sales2 + sun_sales1 / sun_sales2 AS "_col_3", + mon_sales1 / mon_sales2 AS "_col_4", + tue_sales1 / tue_sales2 AS "_col_5", + wed_sales1 / wed_sales2 AS "_col_6", + thu_sales1 / thu_sales2 AS "_col_7", + fri_sales1 / fri_sales2 AS "_col_8", + sat_sales1 / sat_sales2 AS "_col_9" FROM (SELECT s_store_name s_store_name1, wss.d_week_seq d_week_seq1, s_store_id s_store_id1, @@ -7553,7 +7604,8 @@ LIMIT 100; -------------------------------------- -- TPC-DS 62 -------------------------------------- -SELECT Substr(w_warehouse_name, 1, 20), +# execute: true +SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0", sm_type, web_name, Sum(CASE @@ -8132,6 +8184,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 66 -------------------------------------- +# execute: true SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, @@ -9038,6 +9091,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 68 -------------------------------------- +# execute: true SELECT c_last_name, c_first_name, ca_city, @@ -9580,6 +9634,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 73 -------------------------------------- +# execute: true SELECT c_last_name, c_first_name, c_salutation, @@ -9667,6 +9722,7 @@ ORDER BY -------------------------------------- -- TPC-DS 74 -------------------------------------- +# execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name, @@ -9826,6 +9882,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 75 -------------------------------------- +# execute: true WITH all_sales AS (SELECT d_year, i_brand_id, @@ -10030,6 +10087,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 76 -------------------------------------- +# execute: true SELECT channel, col_name, d_year, @@ -10280,8 +10338,8 @@ WITH "date_dim_2" AS ( "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE - CAST("date_dim"."d_date" AS DATE) <= CAST('2001-09-15' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-08-16' AS DATE) + CAST("date_dim"."d_date" AS DATETIME) <= CAST('2001-09-15' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2001-08-16' AS DATE) ), "store_2" AS ( SELECT "store"."s_store_sk" AS "s_store_sk" @@ -10407,6 +10465,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 78 -------------------------------------- +# execute: true WITH ws AS (SELECT d_year AS ws_sold_year, ws_item_sk, @@ -10596,9 +10655,10 @@ LIMIT 100; -------------------------------------- -- TPC-DS 79 -------------------------------------- +# execute: true SELECT c_last_name, c_first_name, - Substr(s_city, 1, 30), + Substr(s_city, 1, 30) AS "_col_2", ss_ticket_number, amt, profit @@ -10788,8 +10848,8 @@ WITH "date_dim_2" AS ( "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE - CAST("date_dim"."d_date" AS DATE) <= CAST('2000-09-25' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-08-26' AS DATE) + CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-09-25' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-08-26' AS DATE) ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", @@ -10909,6 +10969,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 81 -------------------------------------- +# execute: true WITH customer_total_return AS (SELECT cr_returning_customer_sk AS ctr_customer_sk, ca_state AS ctr_state, @@ -11068,8 +11129,8 @@ JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('1998-06-26' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('1998-04-27' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('1998-06-26' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('1998-04-27' AS DATE) WHERE "item"."i_current_price" <= 93 AND "item"."i_current_price" >= 63 @@ -11329,10 +11390,11 @@ LIMIT 100; -------------------------------------- -- TPC-DS 85 -------------------------------------- -SELECT Substr(r_reason_desc, 1, 20), - Avg(ws_quantity), - Avg(wr_refunded_cash), - Avg(wr_fee) +# execute: true +SELECT Substr(r_reason_desc, 1, 20) AS "_col_0", + Avg(ws_quantity) AS "_col_1", + Avg(wr_refunded_cash) AS "_col_2", + Avg(wr_fee) AS "_col_3" FROM web_sales, web_returns, web_page, @@ -11387,7 +11449,8 @@ SELECT AVG("web_returns"."wr_fee") AS "_col_3" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "date_dim" - ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "date_dim"."d_year" = 2001 + ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" + AND "date_dim"."d_year" = 2001 JOIN "web_page" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk" JOIN "web_returns" AS "web_returns" @@ -11509,7 +11572,8 @@ LIMIT 100; -------------------------------------- -- TPC-DS 87 -------------------------------------- -select count(*) +# execute: true +select count(*) as "_col_0" from ((select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer where store_sales.ss_sold_date_sk = date_dim.d_date_sk @@ -12020,10 +12084,11 @@ LIMIT 100; -------------------------------------- -- TPC-DS 91 -------------------------------------- -SELECT cc_call_center_id Call_Center, - cc_name Call_Center_Name, - cc_manager Manager, - Sum(cr_net_loss) Returns_Loss +# execute: true +SELECT cc_call_center_id call_center, + cc_name call_center_name, + cc_manager manager, + Sum(cr_net_loss) returns_loss FROM call_center, catalog_returns, date_dim, @@ -12135,7 +12200,7 @@ WITH "web_sales_2" AS ( FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" >= '2002-03-29' - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-06-27' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2002-06-27' AS DATE) ), "_u_0" AS ( SELECT 1.3 * AVG("web_sales"."ws_ext_discount_amt") AS "_col_0", @@ -12276,14 +12341,14 @@ JOIN "date_dim" AS "date_dim" AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk" AND ( CAST('2000-3-01' AS DATE) + INTERVAL '60' DAY - ) >= CAST("date_dim"."d_date" AS DATE) + ) >= CAST("date_dim"."d_date" AS DATETIME) JOIN "web_site" AS "web_site" ON "web_site"."web_company_name" = 'pri' AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk" WHERE "_u_3"."_u_4" IS NULL - AND NOT "_u_0"."_u_1" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "ws1"."ws_warehouse_sk" <> "_x") + AND NOT "_u_0"."_u_1" IS NULL ORDER BY COUNT(DISTINCT "ws1"."ws_order_number") LIMIT 100; @@ -12366,7 +12431,7 @@ JOIN "date_dim" AS "date_dim" AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk" AND ( CAST('2000-4-01' AS DATE) + INTERVAL '60' DAY - ) >= CAST("date_dim"."d_date" AS DATE) + ) >= CAST("date_dim"."d_date" AS DATETIME) JOIN "web_site" AS "web_site" ON "web_site"."web_company_name" = 'pri' AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk" @@ -12379,7 +12444,8 @@ LIMIT 100; -------------------------------------- -- TPC-DS 96 -------------------------------------- -SELECT Count(*) +# execute: true +SELECT Count(*) AS "_col_0" FROM store_sales, household_demographics, time_dim, @@ -12400,7 +12466,8 @@ JOIN "household_demographics" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_dep_count" = 7 JOIN "store" AS "store" - ON "store"."s_store_name" = 'ese' AND "store"."s_store_sk" = "store_sales"."ss_store_sk" + ON "store"."s_store_name" = 'ese' + AND "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 15 @@ -12412,6 +12479,7 @@ LIMIT 100; -------------------------------------- -- TPC-DS 97 -------------------------------------- +# execute: true WITH ssci AS (SELECT ss_customer_sk customer_sk, ss_item_sk item_sk @@ -12502,7 +12570,8 @@ SELECT ) AS "store_and_catalog" FROM "ssci" AS "ssci" FULL JOIN "csci" AS "csci" - ON "csci"."customer_sk" = "ssci"."customer_sk" AND "csci"."item_sk" = "ssci"."item_sk" + ON "csci"."customer_sk" = "ssci"."customer_sk" + AND "csci"."item_sk" = "ssci"."item_sk" LIMIT 100; -------------------------------------- @@ -12546,8 +12615,8 @@ SELECT FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" - AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-17' AS DATE) - AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-18' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) <= CAST('2000-06-17' AS DATE) + AND CAST("date_dim"."d_date" AS DATETIME) >= CAST('2000-05-18' AS DATE) JOIN "item" AS "item" ON "item"."i_category" IN ('Men', 'Home', 'Electronics') AND "item"."i_item_sk" = "store_sales"."ss_item_sk" @@ -12567,7 +12636,8 @@ ORDER BY -------------------------------------- -- TPC-DS 99 -------------------------------------- -SELECT Substr(w_warehouse_name, 1, 20), +# execute: true +SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0", sm_type, cc_name, Sum(CASE diff --git a/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz b/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz index cf64636..f2f07a3 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz and b/tests/fixtures/optimizer/tpc-ds/warehouse.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz index 894ce3b..62ddd8c 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz and b/tests/fixtures/optimizer/tpc-ds/web_page.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz index 21f7040..af05d52 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz and b/tests/fixtures/optimizer/tpc-ds/web_returns.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz index b384c78..26b09b8 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz and b/tests/fixtures/optimizer/tpc-ds/web_sales.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz b/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz index b9b5f72..a8cabdb 100644 Binary files a/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz and b/tests/fixtures/optimizer/tpc-ds/web_site.csv.gz differ diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index 39b5ffa..c131643 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -249,9 +249,9 @@ FROM "orders" AS "orders" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "orders"."o_orderkey" WHERE - NOT "_u_0"."l_orderkey" IS NULL - AND CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE) + CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-07-01' AS DATE) + AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "orders"."o_orderpriority" ORDER BY @@ -609,7 +609,8 @@ JOIN "orders" AS "orders" AND CAST("orders"."o_orderdate" AS DATE) < CAST('1994-01-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-10-01' AS DATE) JOIN "lineitem" AS "lineitem" - ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_returnflag" = 'R' + ON "lineitem"."l_orderkey" = "orders"."o_orderkey" + AND "lineitem"."l_returnflag" = 'R' GROUP BY "customer"."c_custkey", "customer"."c_name", @@ -731,7 +732,8 @@ SELECT ) AS "high_line_count", SUM( CASE - WHEN "orders"."o_orderpriority" <> '1-URGENT' AND "orders"."o_orderpriority" <> '2-HIGH' + WHEN "orders"."o_orderpriority" <> '1-URGENT' + AND "orders"."o_orderpriority" <> '2-HIGH' THEN 1 ELSE 0 END @@ -1257,7 +1259,8 @@ WITH "_u_0" AS ( LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."p_partkey" = "partsupp"."ps_partkey" LEFT JOIN "_u_1" AS "_u_1" - ON "_u_1"."_u_2" = "partsupp"."ps_partkey" AND "_u_1"."_u_3" = "partsupp"."ps_suppkey" + ON "_u_1"."_u_2" = "partsupp"."ps_partkey" + AND "_u_1"."_u_3" = "partsupp"."ps_suppkey" WHERE "_u_1"."_col_0" < "partsupp"."ps_availqty" AND NOT "_u_0"."p_partkey" IS NULL GROUP BY @@ -1270,7 +1273,8 @@ FROM "supplier" AS "supplier" LEFT JOIN "_u_4" AS "_u_4" ON "_u_4"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation" AS "nation" - ON "nation"."n_name" = 'CANADA' AND "nation"."n_nationkey" = "supplier"."s_nationkey" + ON "nation"."n_name" = 'CANADA' + AND "nation"."n_nationkey" = "supplier"."s_nationkey" WHERE NOT "_u_4"."ps_suppkey" IS NULL ORDER BY @@ -1358,8 +1362,8 @@ WHERE "_u_2"."l_orderkey" IS NULL OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "l1"."l_suppkey" <> "_x") ) - AND NOT "_u_0"."l_orderkey" IS NULL AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "l1"."l_suppkey" <> "_x") + AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "supplier"."s_name" ORDER BY diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql index 23d9511..fac08be 100644 --- a/tests/fixtures/pretty.sql +++ b/tests/fixtures/pretty.sql @@ -384,3 +384,14 @@ JOIN b CROSS JOIN d JOIN e ON d.id = e.id; + +SELECT * FROM a JOIN b JOIN c USING (e) JOIN d USING (f) USING (g); +SELECT + * +FROM a +JOIN b + JOIN c + USING (e) + JOIN d + USING (f) + USING (g); diff --git a/tests/test_executor.py b/tests/test_executor.py index 981c1d4..1eaca14 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -1,7 +1,7 @@ import os import datetime import unittest -from datetime import date +from datetime import date, time from multiprocessing import Pool import duckdb @@ -640,6 +640,7 @@ class TestExecutor(unittest.TestCase): ("CAST(1 AS TEXT)", "1"), ("CAST('1' AS LONG)", 1), ("CAST('1.1' AS FLOAT)", 1.1), + ("CAST('12:05:01' AS TIME)", time(12, 5, 1)), ("COALESCE(NULL)", None), ("COALESCE(NULL, NULL)", None), ("COALESCE(NULL, 'b')", "b"), @@ -702,6 +703,18 @@ class TestExecutor(unittest.TestCase): ("ARRAY_JOIN(['hello', null ,'world'], ' ', ',')", "hello , world"), ("ARRAY_JOIN(['', null ,'world'], ' ', ',')", " , world"), ("STRUCT('foo', 'bar', null, null)", {"foo": "bar"}), + ("ROUND(1.5)", 2), + ("ROUND(1.2)", 1), + ("ROUND(1.2345, 2)", 1.23), + ("ROUND(NULL)", None), + ("UNIXTOTIME(1659981729)", datetime.datetime(2022, 8, 8, 18, 2, 9)), + ("TIMESTRTOTIME('2013-04-05 01:02:03')", datetime.datetime(2013, 4, 5, 1, 2, 3)), + ("UNIXTOTIME(40 * 365 * 86400)", datetime.datetime(2009, 12, 22, 00, 00, 00)), + ( + "STRTOTIME('08/03/2024 12:34:56', '%d/%m/%Y %H:%M:%S')", + datetime.datetime(2024, 3, 8, 12, 34, 56), + ), + ("STRTOTIME('27/01/2024', '%d/%m/%Y')", datetime.datetime(2024, 1, 27)), ]: with self.subTest(sql): result = execute(f"SELECT {sql}") @@ -807,7 +820,7 @@ class TestExecutor(unittest.TestCase): self.assertEqual(result.columns, columns) self.assertEqual(result.rows, expected) - def test_dict_values(self): + def test_nested_values(self): tables = {"foo": [{"raw": {"name": "Hello, World", "a": [{"b": 1}]}}]} result = execute("SELECT raw:name AS name FROM foo", read="snowflake", tables=tables) @@ -837,3 +850,9 @@ class TestExecutor(unittest.TestCase): self.assertEqual(result.columns, ("flavor",)) self.assertEqual(result.rows, [("cherry",), ("lime",), ("apple",)]) + + tables = {"t": [{"x": [1, 2, 3]}]} + + result = execute("SELECT x FROM t", dialect="duckdb", tables=tables) + self.assertEqual(result.columns, ("x",)) + self.assertEqual(result.rows, [([1, 2, 3],)]) diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 11f8fd3..ed19ac1 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -22,6 +22,9 @@ class TestExpressions(unittest.TestCase): pass def test_eq(self): + query = parse_one("SELECT x FROM t") + self.assertEqual(query, query.copy()) + self.assertNotEqual(exp.to_identifier("a"), exp.to_identifier("A")) self.assertEqual( @@ -498,6 +501,18 @@ class TestExpressions(unittest.TestCase): self.assertEqual(expression.transform(fun).sql(), "FUN(a)") + def test_transform_with_parent_mutation(self): + expression = parse_one("SELECT COUNT(1) FROM table") + + def fun(node): + if str(node) == "COUNT(1)": + # node gets silently mutated here - its parent points to the filter node + return exp.Filter(this=node, expression=exp.Where(this=exp.true())) + return node + + transformed = expression.transform(fun) + self.assertEqual(transformed.sql(), "SELECT COUNT(1) FILTER(WHERE TRUE) FROM table") + def test_transform_multiple_children(self): expression = parse_one("SELECT * FROM x") @@ -517,7 +532,6 @@ class TestExpressions(unittest.TestCase): return node self.assertEqual(expression.transform(remove_column_b).sql(), "SELECT a FROM x") - self.assertEqual(expression.transform(lambda _: None), None) expression = parse_one("CAST(x AS FLOAT)") @@ -544,6 +558,11 @@ class TestExpressions(unittest.TestCase): expression.find(exp.Table).replace(parse_one("y")) self.assertEqual(expression.sql(), "SELECT c, b FROM y") + # we try to replace a with a list but a's parent is actually ordered, not the ORDER BY node + expression = parse_one("SELECT * FROM x ORDER BY a DESC, c") + expression.find(exp.Ordered).this.replace([exp.column("a").asc(), exp.column("b").desc()]) + self.assertEqual(expression.sql(), "SELECT * FROM x ORDER BY a, b DESC, c") + def test_arg_deletion(self): # Using the pop helper method expression = parse_one("SELECT a, b FROM x") @@ -573,10 +592,8 @@ class TestExpressions(unittest.TestCase): expression = parse_one("SELECT * FROM (SELECT * FROM x)") self.assertEqual(len(list(expression.walk())), 9) self.assertEqual(len(list(expression.walk(bfs=False))), 9) - self.assertTrue(all(isinstance(e, exp.Expression) for e, _, _ in expression.walk())) - self.assertTrue( - all(isinstance(e, exp.Expression) for e, _, _ in expression.walk(bfs=False)) - ) + self.assertTrue(all(isinstance(e, exp.Expression) for e in expression.walk())) + self.assertTrue(all(isinstance(e, exp.Expression) for e in expression.walk(bfs=False))) def test_functions(self): self.assertIsInstance(parse_one("x LIKE ANY (y)"), exp.Like) @@ -611,7 +628,9 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("LEAST(a, b)"), exp.Least) self.assertIsInstance(parse_one("LIKE(x, y)"), exp.Like) self.assertIsInstance(parse_one("LN(a)"), exp.Ln) - self.assertIsInstance(parse_one("LOG10(a)"), exp.Log10) + self.assertIsInstance(parse_one("LOG(b, n)"), exp.Log) + self.assertIsInstance(parse_one("LOG2(a)"), exp.Log) + self.assertIsInstance(parse_one("LOG10(a)"), exp.Log) self.assertIsInstance(parse_one("MAX(a)"), exp.Max) self.assertIsInstance(parse_one("MIN(a)"), exp.Min) self.assertIsInstance(parse_one("MONTH(a)"), exp.Month) @@ -765,6 +784,15 @@ class TestExpressions(unittest.TestCase): self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object}) def test_convert(self): + from collections import namedtuple + + PointTuple = namedtuple("Point", ["x", "y"]) + + class PointClass: + def __init__(self, x=0, y=0): + self.x = x + self.y = y + for value, expected in [ (1, "1"), ("1", "'1'"), @@ -775,14 +803,17 @@ class TestExpressions(unittest.TestCase): ({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"), ( datetime.datetime(2022, 10, 1, 1, 1, 1, 1), - "TIME_STR_TO_TIME('2022-10-01T01:01:01.000001+00:00')", + "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001+00:00')", ), ( datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc), - "TIME_STR_TO_TIME('2022-10-01T01:01:01+00:00')", + "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00')", ), (datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"), (math.nan, "NULL"), + (b"\x00\x00\x00\x00\x00\x00\x07\xd3", "2003"), + (PointTuple(1, 2), "STRUCT(1 AS x, 2 AS y)"), + (PointClass(1, 2), "STRUCT(1 AS x, 2 AS y)"), ]: with self.subTest(value): self.assertEqual(exp.convert(value).sql(), expected) diff --git a/tests/test_lineage.py b/tests/test_lineage.py index ed1a448..c782d9a 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -269,6 +269,41 @@ class TestLineage(unittest.TestCase): node = node.downstream[0] self.assertEqual(node.name, "z.a") + node = lineage( + "a", + """ + WITH foo AS ( + SELECT + 1 AS a + ), bar AS ( + ( + SELECT + a + 1 AS a + FROM foo + ) + ) + ( + SELECT + a + b AS a + FROM bar + CROSS JOIN ( + SELECT + 2 AS b + ) AS baz + ) + """, + ) + self.assertEqual(node.name, "a") + self.assertEqual(len(node.downstream), 2) + a, b = sorted(node.downstream, key=lambda n: n.name) + self.assertEqual(a.name, "bar.a") + self.assertEqual(len(a.downstream), 1) + self.assertEqual(b.name, "baz.b") + self.assertEqual(b.downstream, []) + + node = a.downstream[0] + self.assertEqual(node.name, "foo.a") + def test_lineage_cte_union(self) -> None: query = """ WITH dataset AS ( @@ -353,3 +388,46 @@ class TestLineage(unittest.TestCase): with self.assertRaises(sqlglot.errors.SqlglotError): lineage('"a"', "WITH x AS (SELECT 1 a) SELECT a FROM x", dialect="snowflake") + + def test_ddl_lineage(self) -> None: + sql = """ + INSERT /*+ HINT1 */ + INTO target (x, y) + SELECT subq.x, subq.y + FROM ( + SELECT /*+ HINT2 */ + t.x AS x, + TO_DATE('2023-12-19', 'YYYY-MM-DD') AS y + FROM s.t t + WHERE 1 = 1 AND y = TO_DATE('2023-12-19', 'YYYY-MM-DD') + ) subq + """ + + node = lineage("y", sql, dialect="oracle") + + self.assertEqual(node.name, "Y") + self.assertEqual(node.expression.sql(dialect="oracle"), "SUBQ.Y AS Y") + + downstream = node.downstream[0] + self.assertEqual(downstream.name, "SUBQ.Y") + self.assertEqual( + downstream.expression.sql(dialect="oracle"), "TO_DATE('2023-12-19', 'YYYY-MM-DD') AS Y" + ) + + def test_trim(self) -> None: + sql = """ + SELECT a, b, c + FROM (select a, b, c from y) z + """ + + node = lineage("a", sql, trim_selects=False) + + self.assertEqual(node.name, "a") + self.assertEqual( + node.source.sql(), + "SELECT z.a AS a, z.b AS b, z.c AS c FROM (SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y) AS z", + ) + + downstream = node.downstream[0] + self.assertEqual(downstream.name, "z.a") + self.assertEqual(downstream.source.sql(), "SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y") diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 046e5a6..0e8ce15 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -298,7 +298,9 @@ class TestOptimizer(unittest.TestCase): self.check_file( "qualify_columns", qualify_columns, execute=True, schema=self.schema, set_dialect=True ) - self.check_file("qualify_columns_ddl", qualify_columns, schema=self.schema) + self.check_file( + "qualify_columns_ddl", qualify_columns, schema=self.schema, set_dialect=True + ) def test_qualify_columns__with_invisible(self): schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}}) @@ -340,6 +342,9 @@ class TestOptimizer(unittest.TestCase): def test_simplify(self): self.check_file("simplify", simplify, set_dialect=True) + expression = parse_one("SELECT a, c, b FROM table1 WHERE 1 = 1") + self.assertEqual(simplify(simplify(expression.find(exp.Where))).sql(), "WHERE TRUE") + expression = parse_one("TRUE AND TRUE AND TRUE") self.assertEqual(exp.true(), optimizer.simplify.simplify(expression)) self.assertEqual(exp.true(), optimizer.simplify.simplify(expression.this)) @@ -359,15 +364,18 @@ class TestOptimizer(unittest.TestCase): self.assertEqual("CONCAT('a', x, 'bc')", simplified_safe_concat.sql()) anon_unquoted_str = parse_one("anonymous(x, y)") - self.assertEqual(optimizer.simplify.gen(anon_unquoted_str), "ANONYMOUS x,y") + self.assertEqual(optimizer.simplify.gen(anon_unquoted_str), "ANONYMOUS(x,y)") + + query = parse_one("SELECT x FROM t") + self.assertEqual(optimizer.simplify.gen(query), optimizer.simplify.gen(query.copy())) anon_unquoted_identifier = exp.Anonymous( this=exp.to_identifier("anonymous"), expressions=[exp.column("x"), exp.column("y")] ) - self.assertEqual(optimizer.simplify.gen(anon_unquoted_identifier), "ANONYMOUS x,y") + self.assertEqual(optimizer.simplify.gen(anon_unquoted_identifier), "ANONYMOUS(x,y)") anon_quoted = parse_one('"anonymous"(x, y)') - self.assertEqual(optimizer.simplify.gen(anon_quoted), '"anonymous" x,y') + self.assertEqual(optimizer.simplify.gen(anon_quoted), '"anonymous"(x,y)') with self.assertRaises(ValueError) as e: anon_invalid = exp.Anonymous(this=5) @@ -375,6 +383,28 @@ class TestOptimizer(unittest.TestCase): self.assertIn("Anonymous.this expects a str or an Identifier, got 'int'.", str(e.exception)) + sql = parse_one( + """ + WITH cte AS (select 1 union select 2), cte2 AS ( + SELECT ROW() OVER (PARTITION BY y) FROM ( + (select 1) limit 10 + ) + ) + SELECT + *, + a + 1, + a div 1, + filter("B", (x, y) -> x + y) + FROM (z AS z CROSS JOIN z) AS f(a) LEFT JOIN a.b.c.d.e.f.g USING(n) ORDER BY 1 + """ + ) + self.assertEqual( + optimizer.simplify.gen(sql), + """ +SELECT :with,WITH :expressions,CTE :this,UNION :this,SELECT :expressions,1,:expression,SELECT :expressions,2,:distinct,True,:alias, AS cte,CTE :this,SELECT :expressions,WINDOW :this,ROW(),:partition_by,y,:over,OVER,:from,FROM ((SELECT :expressions,1):limit,LIMIT :expression,10),:alias, AS cte2,:expressions,STAR,a + 1,a DIV 1,FILTER("B",LAMBDA :this,x + y,:expressions,x,y),:from,FROM (z AS z:joins,JOIN :this,z,:kind,CROSS) AS f(a),:joins,JOIN :this,a.b.c.d.e.f.g,:side,LEFT,:using,n,:order,ORDER :expressions,ORDERED :this,1,:nulls_first,True +""".strip(), + ) + def test_unnest_subqueries(self): self.check_file( "unnest_subqueries", @@ -475,6 +505,18 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') ) def test_scope(self): + ast = parse_one("SELECT IF(a IN UNNEST(b), 1, 0) AS c FROM t", dialect="bigquery") + self.assertEqual(build_scope(ast).columns, [exp.column("a"), exp.column("b")]) + + many_unions = parse_one(" UNION ALL ".join(["SELECT x FROM t"] * 10000)) + scopes_using_traverse = list(build_scope(many_unions).traverse()) + scopes_using_traverse_scope = traverse_scope(many_unions) + self.assertEqual(len(scopes_using_traverse), len(scopes_using_traverse_scope)) + assert all( + x.expression is y.expression + for x, y in zip(scopes_using_traverse, scopes_using_traverse_scope) + ) + sql = """ WITH q AS ( SELECT x.b FROM x @@ -522,7 +564,7 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual( { node.sql() - for node, *_ in walk_in_scope(expression.find(exp.Where)) + for node in walk_in_scope(expression.find(exp.Where)) if isinstance(node, exp.Column) }, {"s.b"}, @@ -667,6 +709,14 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual(expressions[0].type.this, exp.DataType.Type.BIGINT) self.assertEqual(expressions[1].type.this, exp.DataType.Type.DOUBLE) + expressions = annotate_types( + parse_one("SELECT SUM(2 / 3), CAST(2 AS DECIMAL) / 3", dialect="mysql") + ).expressions + + self.assertEqual(expressions[0].type.this, exp.DataType.Type.DOUBLE) + self.assertEqual(expressions[0].this.type.this, exp.DataType.Type.DOUBLE) + self.assertEqual(expressions[1].type.this, exp.DataType.Type.DECIMAL) + def test_bracket_annotation(self): expression = annotate_types(parse_one("SELECT A[:]")).expressions[0] @@ -1056,6 +1106,34 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual(expression.selects[1].type, exp.DataType.build("STRUCT")) self.assertEqual(expression.selects[2].type, exp.DataType.build("int")) + self.assertEqual( + annotate_types( + optimizer.qualify.qualify( + parse_one( + "SELECT x FROM UNNEST(GENERATE_DATE_ARRAY('2021-01-01', current_date(), interval 1 day)) AS x" + ) + ) + ) + .selects[0] + .type, + exp.DataType.build("date"), + ) + + def test_map_annotation(self): + # ToMap annotation + expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb")) + self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)")) + + # Map annotation + expression = annotate_types( + parse_one("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])", read="duckdb") + ) + self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)")) + + # VarMap annotation + expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark")) + self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)")) + def test_recursive_cte(self): query = parse_one( """ diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 970c1ac..29ef5b6 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -85,6 +85,18 @@ x""" ], ) + for simple_query in ("SELECT 1\r\n", "\r\nSELECT 1"): + tokens = Tokenizer().tokenize(simple_query) + tokens = [(token.token_type, token.text) for token in tokens] + + self.assertEqual( + tokens, + [ + (TokenType.SELECT, "SELECT"), + (TokenType.NUMBER, "1"), + ], + ) + def test_command(self): tokens = Tokenizer().tokenize("SHOW;") self.assertEqual(tokens[0].token_type, TokenType.SHOW) diff --git a/tests/test_transpile.py b/tests/test_transpile.py index 49deda9..0c65da4 100644 --- a/tests/test_transpile.py +++ b/tests/test_transpile.py @@ -62,10 +62,28 @@ class TestTranspile(unittest.TestCase): def test_some(self): self.validate( "SELECT * FROM x WHERE a = SOME (SELECT 1)", - "SELECT * FROM x WHERE a = ANY (SELECT 1)", + "SELECT * FROM x WHERE a = ANY(SELECT 1)", ) def test_leading_comma(self): + self.validate( + "SELECT a, b, c FROM (SELECT a, b, c FROM t)", + "SELECT\n" + " a\n" + " , b\n" + " , c\n" + "FROM (\n" + " SELECT\n" + " a\n" + " , b\n" + " , c\n" + " FROM t\n" + ")", + leading_comma=True, + pretty=True, + pad=4, + indent=4, + ) self.validate( "SELECT FOO, BAR, BAZ", "SELECT\n FOO\n , BAR\n , BAZ", @@ -275,7 +293,7 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""", FROM b /* where */ WHERE - foo /* comment 1 */ AND bar AND bla /* comment 2 */""", + foo AND /* comment 1 */ bar AND /* comment 2 */ bla""", pretty=True, ) self.validate( @@ -428,7 +446,8 @@ FROM dw_1_dw_1_1.exactonline_2.transactionlines""", """SELECT 'hotel1' AS hotel, * -FROM dw_1_dw_1_1.exactonline_1.transactionlines /* +FROM dw_1_dw_1_1.exactonline_1.transactionlines +/* UNION ALL SELECT 'Thon Partner Hotel Jølster' AS hotel, @@ -479,6 +498,32 @@ SELECT FROM base""", pretty=True, ) + self.validate( + """-- comment +SOME_FUNC(arg IGNORE NULLS) + OVER (PARTITION BY foo ORDER BY bla) AS col""", + "SOME_FUNC(arg IGNORE NULLS) OVER (PARTITION BY foo ORDER BY bla) AS col /* comment */", + pretty=True, + ) + self.validate( + """ + SELECT * + FROM x + INNER JOIN y + -- inner join z + LEFT JOIN z using (id) + using (id) + """, + """SELECT + * +FROM x +INNER JOIN y + /* inner join z */ + LEFT JOIN z + USING (id) + USING (id)""", + pretty=True, + ) def test_types(self): self.validate("INT 1", "CAST(1 AS INT)") @@ -676,7 +721,11 @@ FROM base""", ) self.validate("STR_TO_TIME('x', 'y')", "DATE_PARSE('x', 'y')", write="presto") - self.validate("STR_TO_UNIX('x', 'y')", "TO_UNIXTIME(DATE_PARSE('x', 'y'))", write="presto") + self.validate( + "STR_TO_UNIX('x', 'y')", + "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('x' AS VARCHAR), 'y')), PARSE_DATETIME(CAST('x' AS VARCHAR), 'y')))", + write="presto", + ) self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="presto") self.validate("TIME_TO_UNIX(x)", "TO_UNIXTIME(x)", write="presto") self.validate( @@ -714,7 +763,10 @@ FROM base""", self.validate("x[x - 1]", "x[x - 1]", write="presto", identity=False) self.validate( - "x[array_size(y) - 1]", "x[CARDINALITY(y) - 1 + 1]", write="presto", identity=False + "x[array_size(y) - 1]", + "x[(CARDINALITY(y) - 1) + 1]", + write="presto", + identity=False, ) self.validate("x[3 - 1]", "x[3]", write="presto", identity=False) self.validate("MAP(a, b)[0]", "MAP(a, b)[0]", write="presto", identity=False) @@ -758,7 +810,6 @@ FROM base""", "CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')", "COMMENT ON ACCESS METHOD gin IS 'GIN index access method'", "CREATE OR REPLACE STAGE", - "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)", "EXECUTE statement", "EXPLAIN SELECT * FROM x", "GRANT INSERT ON foo TO bla", @@ -904,3 +955,8 @@ FROM base""", with self.assertRaises(UnsupportedError) as ctx: unsupported(ErrorLevel.IMMEDIATE) self.assertEqual(str(ctx.exception).count(error), 1) + + def test_recursion(self): + sql = "1 AND 2 OR 3 AND " * 1000 + sql += "4" + self.assertEqual(len(parse_one(sql).sql()), 17001) -- cgit v1.2.3