summaryrefslogtreecommitdiffstats
path: root/lib/compression/tests/scripts/three-byte-hash
blob: 100d0bc39d8338e274775a9eed15788df4760abd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/python3
"""Print statistics about a certain three byte hash.

USAGE: three_byte_hash
"""
import sys

if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1:
    print(__doc__)
    exit(not ('--help' in sys.argv or '-h' in sys.argv))


from statistics import mean, pstdev, median


def h(*args, bits=12):
    a = args[0]
    b = args[1] ^ 0x2e
    c = args[2] ^ 0x55
    d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a)
    return d & ((1 << bits) - 1)


def count(fn, bits, filter=None):
    counts = [0] * (1 << bits)
    for i in range(256 ** 3):
        a, b, c = i & 255, (i >> 8) & 255, i >> 16
        if filter and not (filter(a) and filter(b) and filter(c)):
            continue

        h = fn(a, b, c, bits=bits)
        counts[h] += 1

    print(f" {bits} bits; {len(counts)} buckets, "
          f"expected {(1<<24) / len(counts)}")
    print(f"median {median(counts)}")
    print(f"mean {mean(counts)}")
    print(f"min {min(counts)}")
    print(f"max {max(counts)}")
    print(f"stddev {pstdev(counts)}")


for b in (12, 13, 14):
    count(h, b)

    print("With ASCII filter")
    letters = set(range(32, 127))
    letters |= set(b'\r\n\t\0')
    count(h, b, filter=letters.__contains__)