summaryrefslogtreecommitdiffstats
path: root/lib/compression/tests/scripts/three-byte-hash
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compression/tests/scripts/three-byte-hash')
-rwxr-xr-xlib/compression/tests/scripts/three-byte-hash49
1 files changed, 49 insertions, 0 deletions
diff --git a/lib/compression/tests/scripts/three-byte-hash b/lib/compression/tests/scripts/three-byte-hash
new file mode 100755
index 0000000..100d0bc
--- /dev/null
+++ b/lib/compression/tests/scripts/three-byte-hash
@@ -0,0 +1,49 @@
+#!/usr/bin/python3
+"""Print statistics about a certain three byte hash.
+
+USAGE: three_byte_hash
+"""
+import sys
+
+if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1:
+ print(__doc__)
+ exit(not ('--help' in sys.argv or '-h' in sys.argv))
+
+
+from statistics import mean, pstdev, median
+
+
+def h(*args, bits=12):
+ a = args[0]
+ b = args[1] ^ 0x2e
+ c = args[2] ^ 0x55
+ d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a)
+ return d & ((1 << bits) - 1)
+
+
+def count(fn, bits, filter=None):
+ counts = [0] * (1 << bits)
+ for i in range(256 ** 3):
+ a, b, c = i & 255, (i >> 8) & 255, i >> 16
+ if filter and not (filter(a) and filter(b) and filter(c)):
+ continue
+
+ h = fn(a, b, c, bits=bits)
+ counts[h] += 1
+
+ print(f" {bits} bits; {len(counts)} buckets, "
+ f"expected {(1<<24) / len(counts)}")
+ print(f"median {median(counts)}")
+ print(f"mean {mean(counts)}")
+ print(f"min {min(counts)}")
+ print(f"max {max(counts)}")
+ print(f"stddev {pstdev(counts)}")
+
+
+for b in (12, 13, 14):
+ count(h, b)
+
+ print("With ASCII filter")
+ letters = set(range(32, 127))
+ letters |= set(b'\r\n\t\0')
+ count(h, b, filter=letters.__contains__)