summaryrefslogtreecommitdiffstats
path: root/tests/dialects/test_hive.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/dialects/test_hive.py')
-rw-r--r--tests/dialects/test_hive.py541
1 files changed, 541 insertions, 0 deletions
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
new file mode 100644
index 0000000..eccd75a
--- /dev/null
+++ b/tests/dialects/test_hive.py
@@ -0,0 +1,541 @@
+from tests.dialects.test_dialect import Validator
+
+
+class TestHive(Validator):
+ dialect = "hive"
+
+ def test_bits(self):
+ self.validate_all(
+ "x & 1",
+ write={
+ "duckdb": "x & 1",
+ "presto": "BITWISE_AND(x, 1)",
+ "hive": "x & 1",
+ "spark": "x & 1",
+ },
+ )
+ self.validate_all(
+ "~x",
+ write={
+ "duckdb": "~x",
+ "presto": "BITWISE_NOT(x)",
+ "hive": "~x",
+ "spark": "~x",
+ },
+ )
+ self.validate_all(
+ "x | 1",
+ write={
+ "duckdb": "x | 1",
+ "presto": "BITWISE_OR(x, 1)",
+ "hive": "x | 1",
+ "spark": "x | 1",
+ },
+ )
+ self.validate_all(
+ "x << 1",
+ read={
+ "spark": "SHIFTLEFT(x, 1)",
+ },
+ write={
+ "duckdb": "x << 1",
+ "presto": "BITWISE_ARITHMETIC_SHIFT_LEFT(x, 1)",
+ "hive": "x << 1",
+ "spark": "SHIFTLEFT(x, 1)",
+ },
+ )
+ self.validate_all(
+ "x >> 1",
+ read={
+ "spark": "SHIFTRIGHT(x, 1)",
+ },
+ write={
+ "duckdb": "x >> 1",
+ "presto": "BITWISE_ARITHMETIC_SHIFT_RIGHT(x, 1)",
+ "hive": "x >> 1",
+ "spark": "SHIFTRIGHT(x, 1)",
+ },
+ )
+ self.validate_all(
+ "x & 1 > 0",
+ write={
+ "duckdb": "x & 1 > 0",
+ "presto": "BITWISE_AND(x, 1) > 0",
+ "hive": "x & 1 > 0",
+ "spark": "x & 1 > 0",
+ },
+ )
+
+ def test_cast(self):
+ self.validate_all(
+ "1s",
+ write={
+ "duckdb": "CAST(1 AS SMALLINT)",
+ "presto": "CAST(1 AS SMALLINT)",
+ "hive": "CAST(1 AS SMALLINT)",
+ "spark": "CAST(1 AS SHORT)",
+ },
+ )
+ self.validate_all(
+ "1S",
+ write={
+ "duckdb": "CAST(1 AS SMALLINT)",
+ "presto": "CAST(1 AS SMALLINT)",
+ "hive": "CAST(1 AS SMALLINT)",
+ "spark": "CAST(1 AS SHORT)",
+ },
+ )
+ self.validate_all(
+ "1Y",
+ write={
+ "duckdb": "CAST(1 AS TINYINT)",
+ "presto": "CAST(1 AS TINYINT)",
+ "hive": "CAST(1 AS TINYINT)",
+ "spark": "CAST(1 AS BYTE)",
+ },
+ )
+ self.validate_all(
+ "1L",
+ write={
+ "duckdb": "CAST(1 AS BIGINT)",
+ "presto": "CAST(1 AS BIGINT)",
+ "hive": "CAST(1 AS BIGINT)",
+ "spark": "CAST(1 AS LONG)",
+ },
+ )
+ self.validate_all(
+ "1.0bd",
+ write={
+ "duckdb": "CAST(1.0 AS DECIMAL)",
+ "presto": "CAST(1.0 AS DECIMAL)",
+ "hive": "CAST(1.0 AS DECIMAL)",
+ "spark": "CAST(1.0 AS DECIMAL)",
+ },
+ )
+ self.validate_all(
+ "CAST(1 AS INT)",
+ read={
+ "presto": "TRY_CAST(1 AS INT)",
+ },
+ write={
+ "duckdb": "TRY_CAST(1 AS INT)",
+ "presto": "TRY_CAST(1 AS INTEGER)",
+ "hive": "CAST(1 AS INT)",
+ "spark": "CAST(1 AS INT)",
+ },
+ )
+
+ def test_ddl(self):
+ self.validate_all(
+ "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
+ write={
+ "presto": "CREATE TABLE test WITH (FORMAT = 'parquet', x = '1', Z = '2') AS SELECT 1",
+ "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
+ "spark": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
+ },
+ )
+ self.validate_all(
+ "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
+ write={
+ "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
+ "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
+ "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
+ },
+ )
+
+ def test_lateral_view(self):
+ self.validate_all(
+ "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b",
+ write={
+ "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a) CROSS JOIN UNNEST(z) AS u(b)",
+ "hive": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b",
+ "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a",
+ write={
+ "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)",
+ "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a",
+ "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a",
+ write={
+ "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a)",
+ "hive": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a",
+ "spark": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a",
+ write={
+ "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a)",
+ "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a",
+ "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a",
+ },
+ )
+
+ def test_quotes(self):
+ self.validate_all(
+ "'\\''",
+ write={
+ "duckdb": "''''",
+ "presto": "''''",
+ "hive": "'\\''",
+ "spark": "'\\''",
+ },
+ )
+ self.validate_all(
+ "'\"x\"'",
+ write={
+ "duckdb": "'\"x\"'",
+ "presto": "'\"x\"'",
+ "hive": "'\"x\"'",
+ "spark": "'\"x\"'",
+ },
+ )
+ self.validate_all(
+ "\"'x'\"",
+ write={
+ "duckdb": "'''x'''",
+ "presto": "'''x'''",
+ "hive": "'\\'x\\''",
+ "spark": "'\\'x\\''",
+ },
+ )
+ self.validate_all(
+ "'\\\\a'",
+ read={
+ "presto": "'\\a'",
+ },
+ write={
+ "duckdb": "'\\a'",
+ "presto": "'\\a'",
+ "hive": "'\\\\a'",
+ "spark": "'\\\\a'",
+ },
+ )
+
+ def test_regex(self):
+ self.validate_all(
+ "a RLIKE 'x'",
+ write={
+ "duckdb": "REGEXP_MATCHES(a, 'x')",
+ "presto": "REGEXP_LIKE(a, 'x')",
+ "hive": "a RLIKE 'x'",
+ "spark": "a RLIKE 'x'",
+ },
+ )
+
+ self.validate_all(
+ "a REGEXP 'x'",
+ write={
+ "duckdb": "REGEXP_MATCHES(a, 'x')",
+ "presto": "REGEXP_LIKE(a, 'x')",
+ "hive": "a RLIKE 'x'",
+ "spark": "a RLIKE 'x'",
+ },
+ )
+
+ def test_time(self):
+ self.validate_all(
+ "DATEDIFF(a, b)",
+ write={
+ "duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
+ "presto": "DATE_DIFF('day', CAST(SUBSTR(CAST(b AS VARCHAR), 1, 10) AS DATE), CAST(SUBSTR(CAST(a AS VARCHAR), 1, 10) AS DATE))",
+ "hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
+ "spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
+ "": "DATE_DIFF(TS_OR_DS_TO_DATE(a), TS_OR_DS_TO_DATE(b))",
+ },
+ )
+ self.validate_all(
+ """from_unixtime(x, "yyyy-MM-dd'T'HH")""",
+ write={
+ "duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), '%Y-%m-%d''T''%H')",
+ "presto": "DATE_FORMAT(FROM_UNIXTIME(x), '%Y-%m-%d''T''%H')",
+ "hive": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')",
+ "spark": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')",
+ },
+ )
+ self.validate_all(
+ "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')",
+ write={
+ "duckdb": "STRFTIME('2020-01-01', '%Y-%m-%d %H:%M:%S')",
+ "presto": "DATE_FORMAT('2020-01-01', '%Y-%m-%d %H:%i:%S')",
+ "hive": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')",
+ "spark": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')",
+ },
+ )
+ self.validate_all(
+ "DATE_ADD('2020-01-01', 1)",
+ write={
+ "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY",
+ "presto": "DATE_ADD('DAY', 1, DATE_PARSE(SUBSTR('2020-01-01', 1, 10), '%Y-%m-%d'))",
+ "hive": "DATE_ADD('2020-01-01', 1)",
+ "spark": "DATE_ADD('2020-01-01', 1)",
+ "": "TS_OR_DS_ADD('2020-01-01', 1, 'DAY')",
+ },
+ )
+ self.validate_all(
+ "DATE_SUB('2020-01-01', 1)",
+ write={
+ "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 * -1 DAY",
+ "presto": "DATE_ADD('DAY', 1 * -1, DATE_PARSE(SUBSTR('2020-01-01', 1, 10), '%Y-%m-%d'))",
+ "hive": "DATE_ADD('2020-01-01', 1 * -1)",
+ "spark": "DATE_ADD('2020-01-01', 1 * -1)",
+ "": "TS_OR_DS_ADD('2020-01-01', 1 * -1, 'DAY')",
+ },
+ )
+ self.validate_all(
+ "DATEDIFF(TO_DATE(y), x)",
+ write={
+ "duckdb": "DATE_DIFF('day', CAST(x AS DATE), CAST(CAST(y AS DATE) AS DATE))",
+ "presto": "DATE_DIFF('day', CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE), CAST(SUBSTR(CAST(CAST(SUBSTR(CAST(y AS VARCHAR), 1, 10) AS DATE) AS VARCHAR), 1, 10) AS DATE))",
+ "hive": "DATEDIFF(TO_DATE(TO_DATE(y)), TO_DATE(x))",
+ "spark": "DATEDIFF(TO_DATE(TO_DATE(y)), TO_DATE(x))",
+ "": "DATE_DIFF(TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE(y)), TS_OR_DS_TO_DATE(x))",
+ },
+ )
+ self.validate_all(
+ "UNIX_TIMESTAMP(x)",
+ write={
+ "duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))",
+ "presto": "TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %H:%i:%S'))",
+ "hive": "UNIX_TIMESTAMP(x)",
+ "spark": "UNIX_TIMESTAMP(x)",
+ "": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')",
+ },
+ )
+
+ for unit in ("DAY", "MONTH", "YEAR"):
+ self.validate_all(
+ f"{unit}(x)",
+ write={
+ "duckdb": f"{unit}(CAST(x AS DATE))",
+ "presto": f"{unit}(CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE))",
+ "hive": f"{unit}(TO_DATE(x))",
+ "spark": f"{unit}(TO_DATE(x))",
+ },
+ )
+
+ def test_order_by(self):
+ self.validate_all(
+ "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
+ write={
+ "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname",
+ "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST",
+ "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname",
+ "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname",
+ },
+ )
+
+ def test_hive(self):
+ self.validate_all(
+ "PERCENTILE(x, 0.5)",
+ write={
+ "duckdb": "QUANTILE(x, 0.5)",
+ "presto": "APPROX_PERCENTILE(x, 0.5)",
+ "hive": "PERCENTILE(x, 0.5)",
+ "spark": "PERCENTILE(x, 0.5)",
+ },
+ )
+ self.validate_all(
+ "APPROX_COUNT_DISTINCT(a)",
+ write={
+ "duckdb": "APPROX_COUNT_DISTINCT(a)",
+ "presto": "APPROX_DISTINCT(a)",
+ "hive": "APPROX_COUNT_DISTINCT(a)",
+ "spark": "APPROX_COUNT_DISTINCT(a)",
+ },
+ )
+ self.validate_all(
+ "ARRAY_CONTAINS(x, 1)",
+ write={
+ "duckdb": "ARRAY_CONTAINS(x, 1)",
+ "presto": "CONTAINS(x, 1)",
+ "hive": "ARRAY_CONTAINS(x, 1)",
+ "spark": "ARRAY_CONTAINS(x, 1)",
+ },
+ )
+ self.validate_all(
+ "SIZE(x)",
+ write={
+ "duckdb": "ARRAY_LENGTH(x)",
+ "presto": "CARDINALITY(x)",
+ "hive": "SIZE(x)",
+ "spark": "SIZE(x)",
+ },
+ )
+ self.validate_all(
+ "LOCATE('a', x)",
+ write={
+ "duckdb": "STRPOS(x, 'a')",
+ "presto": "STRPOS(x, 'a')",
+ "hive": "LOCATE('a', x)",
+ "spark": "LOCATE('a', x)",
+ },
+ )
+ self.validate_all(
+ "LOCATE('a', x, 3)",
+ write={
+ "duckdb": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1",
+ "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1",
+ "hive": "LOCATE('a', x, 3)",
+ "spark": "LOCATE('a', x, 3)",
+ },
+ )
+ self.validate_all(
+ "INITCAP('new york')",
+ write={
+ "duckdb": "INITCAP('new york')",
+ "presto": "REGEXP_REPLACE('new york', '(\w)(\w*)', x -> UPPER(x[1]) || LOWER(x[2]))",
+ "hive": "INITCAP('new york')",
+ "spark": "INITCAP('new york')",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x TABLESAMPLE(10) y",
+ write={
+ "presto": "SELECT * FROM x AS y TABLESAMPLE(10)",
+ "hive": "SELECT * FROM x TABLESAMPLE(10) AS y",
+ "spark": "SELECT * FROM x TABLESAMPLE(10) AS y",
+ },
+ )
+ self.validate_all(
+ "SELECT SORT_ARRAY(x)",
+ write={
+ "duckdb": "SELECT ARRAY_SORT(x)",
+ "presto": "SELECT ARRAY_SORT(x)",
+ "hive": "SELECT SORT_ARRAY(x)",
+ "spark": "SELECT SORT_ARRAY(x)",
+ },
+ )
+ self.validate_all(
+ "SELECT SORT_ARRAY(x, FALSE)",
+ read={
+ "duckdb": "SELECT ARRAY_REVERSE_SORT(x)",
+ "spark": "SELECT SORT_ARRAY(x, FALSE)",
+ },
+ write={
+ "duckdb": "SELECT ARRAY_REVERSE_SORT(x)",
+ "presto": "SELECT ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)",
+ "hive": "SELECT SORT_ARRAY(x, FALSE)",
+ "spark": "SELECT SORT_ARRAY(x, FALSE)",
+ },
+ )
+ self.validate_all(
+ "GET_JSON_OBJECT(x, '$.name')",
+ write={
+ "presto": "JSON_EXTRACT_SCALAR(x, '$.name')",
+ "hive": "GET_JSON_OBJECT(x, '$.name')",
+ "spark": "GET_JSON_OBJECT(x, '$.name')",
+ },
+ )
+ self.validate_all(
+ "MAP(a, b, c, d)",
+ write={
+ "duckdb": "MAP(LIST_VALUE(a, c), LIST_VALUE(b, d))",
+ "presto": "MAP(ARRAY[a, c], ARRAY[b, d])",
+ "hive": "MAP(a, b, c, d)",
+ "spark": "MAP_FROM_ARRAYS(ARRAY(a, c), ARRAY(b, d))",
+ },
+ )
+ self.validate_all(
+ "MAP(a, b)",
+ write={
+ "duckdb": "MAP(LIST_VALUE(a), LIST_VALUE(b))",
+ "presto": "MAP(ARRAY[a], ARRAY[b])",
+ "hive": "MAP(a, b)",
+ "spark": "MAP_FROM_ARRAYS(ARRAY(a), ARRAY(b))",
+ },
+ )
+ self.validate_all(
+ "LOG(10)",
+ write={
+ "duckdb": "LN(10)",
+ "presto": "LN(10)",
+ "hive": "LN(10)",
+ "spark": "LN(10)",
+ },
+ )
+ self.validate_all(
+ "LOG(10, 2)",
+ write={
+ "duckdb": "LOG(10, 2)",
+ "presto": "LOG(10, 2)",
+ "hive": "LOG(10, 2)",
+ "spark": "LOG(10, 2)",
+ },
+ )
+ self.validate_all(
+ 'ds = "2020-01-01"',
+ write={
+ "duckdb": "ds = '2020-01-01'",
+ "presto": "ds = '2020-01-01'",
+ "hive": "ds = '2020-01-01'",
+ "spark": "ds = '2020-01-01'",
+ },
+ )
+ self.validate_all(
+ "ds = \"1''2\"",
+ write={
+ "duckdb": "ds = '1''''2'",
+ "presto": "ds = '1''''2'",
+ "hive": "ds = '1\\'\\'2'",
+ "spark": "ds = '1\\'\\'2'",
+ },
+ )
+ self.validate_all(
+ "x == 1",
+ write={
+ "duckdb": "x = 1",
+ "presto": "x = 1",
+ "hive": "x = 1",
+ "spark": "x = 1",
+ },
+ )
+ self.validate_all(
+ "x div y",
+ write={
+ "duckdb": "CAST(x / y AS INT)",
+ "presto": "CAST(x / y AS INTEGER)",
+ "hive": "CAST(x / y AS INT)",
+ "spark": "CAST(x / y AS INT)",
+ },
+ )
+ self.validate_all(
+ "COLLECT_LIST(x)",
+ read={
+ "presto": "ARRAY_AGG(x)",
+ },
+ write={
+ "duckdb": "ARRAY_AGG(x)",
+ "presto": "ARRAY_AGG(x)",
+ "hive": "COLLECT_LIST(x)",
+ "spark": "COLLECT_LIST(x)",
+ },
+ )
+ self.validate_all(
+ "COLLECT_SET(x)",
+ read={
+ "presto": "SET_AGG(x)",
+ },
+ write={
+ "presto": "SET_AGG(x)",
+ "hive": "COLLECT_SET(x)",
+ "spark": "COLLECT_SET(x)",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x TABLESAMPLE(1) AS foo",
+ read={
+ "presto": "SELECT * FROM x AS foo TABLESAMPLE(1)",
+ },
+ write={
+ "presto": "SELECT * FROM x AS foo TABLESAMPLE(1)",
+ "hive": "SELECT * FROM x TABLESAMPLE(1) AS foo",
+ "spark": "SELECT * FROM x TABLESAMPLE(1) AS foo",
+ },
+ )