diff options
Diffstat (limited to 'lib/compression/tests/scripts/three-byte-hash')
-rwxr-xr-x | lib/compression/tests/scripts/three-byte-hash | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/lib/compression/tests/scripts/three-byte-hash b/lib/compression/tests/scripts/three-byte-hash new file mode 100755 index 0000000..100d0bc --- /dev/null +++ b/lib/compression/tests/scripts/three-byte-hash @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +"""Print statistics about a certain three byte hash. + +USAGE: three_byte_hash +""" +import sys + +if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1: + print(__doc__) + exit(not ('--help' in sys.argv or '-h' in sys.argv)) + + +from statistics import mean, pstdev, median + + +def h(*args, bits=12): + a = args[0] + b = args[1] ^ 0x2e + c = args[2] ^ 0x55 + d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a) + return d & ((1 << bits) - 1) + + +def count(fn, bits, filter=None): + counts = [0] * (1 << bits) + for i in range(256 ** 3): + a, b, c = i & 255, (i >> 8) & 255, i >> 16 + if filter and not (filter(a) and filter(b) and filter(c)): + continue + + h = fn(a, b, c, bits=bits) + counts[h] += 1 + + print(f" {bits} bits; {len(counts)} buckets, " + f"expected {(1<<24) / len(counts)}") + print(f"median {median(counts)}") + print(f"mean {mean(counts)}") + print(f"min {min(counts)}") + print(f"max {max(counts)}") + print(f"stddev {pstdev(counts)}") + + +for b in (12, 13, 14): + count(h, b) + + print("With ASCII filter") + letters = set(range(32, 127)) + letters |= set(b'\r\n\t\0') + count(h, b, filter=letters.__contains__) |