1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
import time
from sqlglot.optimizer import optimize
INPUT = "/home/toby/dev/tpch/{i}.sql"
OUTPUT = "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql"
NUM = 22
SCHEMA = {
"lineitem": {
"l_orderkey": "bigint",
"l_partkey": "bigint",
"l_suppkey": "bigint",
"l_linenumber": "bigint",
"l_quantity": "double",
"l_extendedprice": "double",
"l_discount": "double",
"l_tax": "double",
"l_returnflag": "string",
"l_linestatus": "string",
"l_shipdate": "string",
"l_commitdate": "string",
"l_receiptdate": "string",
"l_shipinstruct": "string",
"l_shipmode": "string",
"l_comment": "string",
},
"orders": {
"o_orderkey": "bigint",
"o_custkey": "bigint",
"o_orderstatus": "string",
"o_totalprice": "double",
"o_orderdate": "string",
"o_orderpriority": "string",
"o_clerk": "string",
"o_shippriority": "int",
"o_comment": "string",
},
"customer": {
"c_custkey": "bigint",
"c_name": "string",
"c_address": "string",
"c_nationkey": "bigint",
"c_phone": "string",
"c_acctbal": "double",
"c_mktsegment": "string",
"c_comment": "string",
},
"part": {
"p_partkey": "bigint",
"p_name": "string",
"p_mfgr": "string",
"p_brand": "string",
"p_type": "string",
"p_size": "int",
"p_container": "string",
"p_retailprice": "double",
"p_comment": "string",
},
"supplier": {
"s_suppkey": "bigint",
"s_name": "string",
"s_address": "string",
"s_nationkey": "bigint",
"s_phone": "string",
"s_acctbal": "double",
"s_comment": "string",
},
"partsupp": {
"ps_partkey": "bigint",
"ps_suppkey": "bigint",
"ps_availqty": "int",
"ps_supplycost": "double",
"ps_comment": "string",
},
"nation": {
"n_nationkey": "bigint",
"n_name": "string",
"n_regionkey": "bigint",
"n_comment": "string",
},
"region": {
"r_regionkey": "bigint",
"r_name": "string",
"r_comment": "string",
},
}
KIND = "H"
with open(OUTPUT, "w", encoding="UTF-8") as fixture:
for i in range(NUM):
i = i + 1
with open(INPUT.format(i=i), encoding="UTF-8") as file:
original = "\n".join(
line.rstrip()
for line in file.read().split(";")[0].split("\n")
if not line.startswith("--")
)
original = original.replace("`", '"').strip()
now = time.time()
try:
optimized = optimize(original, schema=SCHEMA)
except Exception as e:
print("****", i, e, "****")
continue
fixture.write(
f"""--------------------------------------
-- TPC-{KIND} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};
"""
)
print(i, time.time() - now)
|