1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
from tests.dialects.test_dialect import Validator
class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
self.validate_identity("INSERT INTO a REPLACE WHERE cond VALUES (1), (2)")
self.validate_identity("SELECT c1 : price")
self.validate_identity("CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x + 1")
self.validate_identity("CREATE FUNCTION a AS b")
self.validate_identity("SELECT ${x} FROM ${y} WHERE ${z} > 1")
self.validate_identity("CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE)))")
self.validate_identity(
"SELECT * FROM sales UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"
)
self.validate_identity(
"SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"
)
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
write={
"databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y))))",
},
)
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json(self):
self.validate_identity("""SELECT c1 : price FROM VALUES ('{ "price": 5 }') AS T(c1)""")
self.validate_all(
"""SELECT c1:['price'] FROM VALUES('{ "price": 5 }') AS T(c1)""",
write={
"databricks": """SELECT c1 : ARRAY('price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
},
)
self.validate_all(
"""SELECT c1:item[1].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
write={
"databricks": """SELECT c1 : item[1].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
},
)
self.validate_all(
"""SELECT c1:item[*].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
write={
"databricks": """SELECT c1 : item[*].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
},
)
self.validate_all(
"""SELECT from_json(c1:item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
write={
"databricks": """SELECT FROM_JSON(c1 : item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
},
)
self.validate_all(
"""SELECT inline(from_json(c1:item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
write={
"databricks": """SELECT INLINE(FROM_JSON(c1 : item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
},
)
def test_datediff(self):
self.validate_all(
"SELECT DATEDIFF(year, 'start', 'end')",
write={
"tsql": "SELECT DATEDIFF(year, 'start', 'end')",
"databricks": "SELECT DATEDIFF(year, 'start', 'end')",
},
)
self.validate_all(
"SELECT DATEDIFF(microsecond, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(microsecond, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000000 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(millisecond, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(millisecond, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(second, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(second, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(minute, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(minute, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 60 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(hour, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(hour, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 3600 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(day, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(day, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 86400 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(week, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(week, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 48 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 4 + EXTRACT(day FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) / 7 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(month, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(month, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(quarter, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(quarter, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) / 3 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(year, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(year, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)",
},
)
def test_add_date(self):
self.validate_all(
"SELECT DATEADD(year, 1, '2020-01-01')",
write={
"tsql": "SELECT DATEADD(year, 1, '2020-01-01')",
"databricks": "SELECT DATEADD(year, 1, '2020-01-01')",
},
)
self.validate_all(
"SELECT DATEDIFF('end', 'start')",
write={"databricks": "SELECT DATEDIFF(DAY, 'start', 'end')"},
)
self.validate_all(
"SELECT DATE_ADD('2020-01-01', 1)",
write={
"tsql": "SELECT DATEADD(DAY, 1, '2020-01-01')",
"databricks": "SELECT DATEADD(DAY, 1, '2020-01-01')",
},
)
def test_without_as(self):
self.validate_all(
"CREATE TABLE x (SELECT 1)",
write={
"databricks": "CREATE TABLE x AS (SELECT 1)",
},
)
self.validate_all(
"WITH x (select 1) SELECT * FROM x",
write={
"databricks": "WITH x AS (SELECT 1) SELECT * FROM x",
},
)
|