summaryrefslogtreecommitdiffstats
path: root/tests/tpch.py
blob: ef2b6665f6063208dfb24c9fee17ade6d5c9c6bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import time

from sqlglot.optimizer import optimize

INPUT = "/home/toby/dev/tpch/{i}.sql"
OUTPUT = "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql"
NUM = 22
SCHEMA = {
    "lineitem": {
        "l_orderkey": "bigint",
        "l_partkey": "bigint",
        "l_suppkey": "bigint",
        "l_linenumber": "bigint",
        "l_quantity": "double",
        "l_extendedprice": "double",
        "l_discount": "double",
        "l_tax": "double",
        "l_returnflag": "string",
        "l_linestatus": "string",
        "l_shipdate": "string",
        "l_commitdate": "string",
        "l_receiptdate": "string",
        "l_shipinstruct": "string",
        "l_shipmode": "string",
        "l_comment": "string",
    },
    "orders": {
        "o_orderkey": "bigint",
        "o_custkey": "bigint",
        "o_orderstatus": "string",
        "o_totalprice": "double",
        "o_orderdate": "string",
        "o_orderpriority": "string",
        "o_clerk": "string",
        "o_shippriority": "int",
        "o_comment": "string",
    },
    "customer": {
        "c_custkey": "bigint",
        "c_name": "string",
        "c_address": "string",
        "c_nationkey": "bigint",
        "c_phone": "string",
        "c_acctbal": "double",
        "c_mktsegment": "string",
        "c_comment": "string",
    },
    "part": {
        "p_partkey": "bigint",
        "p_name": "string",
        "p_mfgr": "string",
        "p_brand": "string",
        "p_type": "string",
        "p_size": "int",
        "p_container": "string",
        "p_retailprice": "double",
        "p_comment": "string",
    },
    "supplier": {
        "s_suppkey": "bigint",
        "s_name": "string",
        "s_address": "string",
        "s_nationkey": "bigint",
        "s_phone": "string",
        "s_acctbal": "double",
        "s_comment": "string",
    },
    "partsupp": {
        "ps_partkey": "bigint",
        "ps_suppkey": "bigint",
        "ps_availqty": "int",
        "ps_supplycost": "double",
        "ps_comment": "string",
    },
    "nation": {
        "n_nationkey": "bigint",
        "n_name": "string",
        "n_regionkey": "bigint",
        "n_comment": "string",
    },
    "region": {
        "r_regionkey": "bigint",
        "r_name": "string",
        "r_comment": "string",
    },
}
KIND = "H"

with open(OUTPUT, "w", encoding="UTF-8") as fixture:
    for i in range(NUM):
        i = i + 1
        with open(INPUT.format(i=i), encoding="UTF-8") as file:
            original = "\n".join(
                line.rstrip()
                for line in file.read().split(";")[0].split("\n")
                if not line.startswith("--")
            )
            original = original.replace("`", '"').strip()
            now = time.time()
            try:
                optimized = optimize(original, schema=SCHEMA)
            except Exception as e:
                print("****", i, e, "****")
                continue

            fixture.write(
                f"""--------------------------------------
-- TPC-{KIND} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};

"""
            )
            print(i, time.time() - now)