summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/tools/third_party/websockets/experiments/compression/benchmark.py
blob: c5b13c8fa343d2dae8a96a796ed3237905eb9c54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python

import getpass
import json
import pickle
import subprocess
import sys
import time
import zlib


CORPUS_FILE = "corpus.pkl"

REPEAT = 10

WB, ML = 12, 5  # defaults used as a reference


def _corpus():
    OAUTH_TOKEN = getpass.getpass("OAuth Token? ")
    COMMIT_API = (
        f'curl -H "Authorization: token {OAUTH_TOKEN}" '
        f"https://api.github.com/repos/python-websockets/websockets/git/commits/:sha"
    )

    commits = []

    head = subprocess.check_output("git rev-parse HEAD", shell=True).decode().strip()
    todo = [head]
    seen = set()

    while todo:
        sha = todo.pop(0)
        commit = subprocess.check_output(COMMIT_API.replace(":sha", sha), shell=True)
        commits.append(commit)
        seen.add(sha)
        for parent in json.loads(commit)["parents"]:
            sha = parent["sha"]
            if sha not in seen and sha not in todo:
                todo.append(sha)
        time.sleep(1)  # rate throttling

    return commits


def corpus():
    data = _corpus()
    with open(CORPUS_FILE, "wb") as handle:
        pickle.dump(data, handle)


def _run(data):
    size = {}
    duration = {}

    for wbits in range(9, 16):
        size[wbits] = {}
        duration[wbits] = {}

        for memLevel in range(1, 10):
            encoder = zlib.compressobj(wbits=-wbits, memLevel=memLevel)
            encoded = []

            t0 = time.perf_counter()

            for _ in range(REPEAT):
                for item in data:
                    if isinstance(item, str):
                        item = item.encode("utf-8")
                    # Taken from PerMessageDeflate.encode
                    item = encoder.compress(item) + encoder.flush(zlib.Z_SYNC_FLUSH)
                    if item.endswith(b"\x00\x00\xff\xff"):
                        item = item[:-4]
                    encoded.append(item)

            t1 = time.perf_counter()

            size[wbits][memLevel] = sum(len(item) for item in encoded)
            duration[wbits][memLevel] = (t1 - t0) / REPEAT

    raw_size = sum(len(item) for item in data)

    print("=" * 79)
    print("Compression ratio")
    print("=" * 79)
    print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
    for wbits in range(9, 16):
        print(
            "\t".join(
                [str(wbits)]
                + [
                    f"{100 * (1 - size[wbits][memLevel] / raw_size):.1f}%"
                    for memLevel in range(1, 10)
                ]
            )
        )
    print("=" * 79)
    print()

    print("=" * 79)
    print("CPU time")
    print("=" * 79)
    print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
    for wbits in range(9, 16):
        print(
            "\t".join(
                [str(wbits)]
                + [
                    f"{1000 * duration[wbits][memLevel]:.1f}ms"
                    for memLevel in range(1, 10)
                ]
            )
        )
    print("=" * 79)
    print()

    print("=" * 79)
    print(f"Size vs. {WB} \\ {ML}")
    print("=" * 79)
    print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
    for wbits in range(9, 16):
        print(
            "\t".join(
                [str(wbits)]
                + [
                    f"{100 * (size[wbits][memLevel] / size[WB][ML] - 1):.1f}%"
                    for memLevel in range(1, 10)
                ]
            )
        )
    print("=" * 79)
    print()

    print("=" * 79)
    print(f"Time vs. {WB} \\ {ML}")
    print("=" * 79)
    print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
    for wbits in range(9, 16):
        print(
            "\t".join(
                [str(wbits)]
                + [
                    f"{100 * (duration[wbits][memLevel] / duration[WB][ML] - 1):.1f}%"
                    for memLevel in range(1, 10)
                ]
            )
        )
    print("=" * 79)
    print()


def run():
    with open(CORPUS_FILE, "rb") as handle:
        data = pickle.load(handle)
    _run(data)


try:
    run = globals()[sys.argv[1]]
except (KeyError, IndexError):
    print(f"Usage: {sys.argv[0]} [corpus|run]")
else:
    run()