summaryrefslogtreecommitdiffstats
path: root/lib/compression/tests/scripts/decode-huffman-header
blob: 9c1279b641b54f37636b508d756cab6934e94bb5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python3
"""Print the codes in the first Huffman tree header in the given file.

USAGE: decode-huffman-header FILE

The number of codes of different length is printed first, followed by
the implied total frequency of codes, followed by the deduced codes.

If the total is not 1.0, the header is invalid.
"""

import sys


if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 2:
    print(__doc__)
    exit(len(sys.argv) != 2)


def read_table(data):
    lengths = [[] for x in range(16)]
    for i, b in enumerate(data):
        even = b & 15
        odd = b >> 4
        lengths[even].append(i * 2)
        lengths[odd].append(i * 2 + 1)

    code = 0

    total = 0.0
    for i, bucket in enumerate(lengths):
        if bucket and i:
            portion = 1.0 / (1 << i) * len(bucket)
            total += portion
            print(f"length {i:2}: {len(bucket):4} ({portion})")
    print(f"total {total}")

    for i, bucket in enumerate(lengths):
        if i == 0:
            continue
        code <<= 1
        for c in bucket:
            print(f'{c:03x}   {code:0{i}b}')
            code += 1


def main():
    fn = sys.argv[1]
    with open(fn, 'rb') as f:
        data = f.read(256)
        read_table(data)


main()