summaryrefslogtreecommitdiffstats
path: root/mkhufftbl.py
diff options
context:
space:
mode:
Diffstat (limited to 'mkhufftbl.py')
-rwxr-xr-xmkhufftbl.py468
1 files changed, 468 insertions, 0 deletions
diff --git a/mkhufftbl.py b/mkhufftbl.py
new file mode 100755
index 0000000..a9f10d7
--- /dev/null
+++ b/mkhufftbl.py
@@ -0,0 +1,468 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# This script reads Huffman Code table [1] and generates symbol table
+# and decoding tables in C language. The resulting code is used in
+# lib/nghttp2_hd_huffman.h and lib/nghttp2_hd_huffman_data.c
+#
+# [1] https://httpwg.org/specs/rfc7541.html
+
+import re
+import sys
+from io import StringIO
+
+# From [1]
+HUFFMAN_CODE_TABLE = """\
+ ( 0) |11111111|11000 1ff8 [13]
+ ( 1) |11111111|11111111|1011000 7fffd8 [23]
+ ( 2) |11111111|11111111|11111110|0010 fffffe2 [28]
+ ( 3) |11111111|11111111|11111110|0011 fffffe3 [28]
+ ( 4) |11111111|11111111|11111110|0100 fffffe4 [28]
+ ( 5) |11111111|11111111|11111110|0101 fffffe5 [28]
+ ( 6) |11111111|11111111|11111110|0110 fffffe6 [28]
+ ( 7) |11111111|11111111|11111110|0111 fffffe7 [28]
+ ( 8) |11111111|11111111|11111110|1000 fffffe8 [28]
+ ( 9) |11111111|11111111|11101010 ffffea [24]
+ ( 10) |11111111|11111111|11111111|111100 3ffffffc [30]
+ ( 11) |11111111|11111111|11111110|1001 fffffe9 [28]
+ ( 12) |11111111|11111111|11111110|1010 fffffea [28]
+ ( 13) |11111111|11111111|11111111|111101 3ffffffd [30]
+ ( 14) |11111111|11111111|11111110|1011 fffffeb [28]
+ ( 15) |11111111|11111111|11111110|1100 fffffec [28]
+ ( 16) |11111111|11111111|11111110|1101 fffffed [28]
+ ( 17) |11111111|11111111|11111110|1110 fffffee [28]
+ ( 18) |11111111|11111111|11111110|1111 fffffef [28]
+ ( 19) |11111111|11111111|11111111|0000 ffffff0 [28]
+ ( 20) |11111111|11111111|11111111|0001 ffffff1 [28]
+ ( 21) |11111111|11111111|11111111|0010 ffffff2 [28]
+ ( 22) |11111111|11111111|11111111|111110 3ffffffe [30]
+ ( 23) |11111111|11111111|11111111|0011 ffffff3 [28]
+ ( 24) |11111111|11111111|11111111|0100 ffffff4 [28]
+ ( 25) |11111111|11111111|11111111|0101 ffffff5 [28]
+ ( 26) |11111111|11111111|11111111|0110 ffffff6 [28]
+ ( 27) |11111111|11111111|11111111|0111 ffffff7 [28]
+ ( 28) |11111111|11111111|11111111|1000 ffffff8 [28]
+ ( 29) |11111111|11111111|11111111|1001 ffffff9 [28]
+ ( 30) |11111111|11111111|11111111|1010 ffffffa [28]
+ ( 31) |11111111|11111111|11111111|1011 ffffffb [28]
+' ' ( 32) |010100 14 [ 6]
+'!' ( 33) |11111110|00 3f8 [10]
+'"' ( 34) |11111110|01 3f9 [10]
+'#' ( 35) |11111111|1010 ffa [12]
+'$' ( 36) |11111111|11001 1ff9 [13]
+'%' ( 37) |010101 15 [ 6]
+'&' ( 38) |11111000 f8 [ 8]
+''' ( 39) |11111111|010 7fa [11]
+'(' ( 40) |11111110|10 3fa [10]
+')' ( 41) |11111110|11 3fb [10]
+'*' ( 42) |11111001 f9 [ 8]
+'+' ( 43) |11111111|011 7fb [11]
+',' ( 44) |11111010 fa [ 8]
+'-' ( 45) |010110 16 [ 6]
+'.' ( 46) |010111 17 [ 6]
+'/' ( 47) |011000 18 [ 6]
+'0' ( 48) |00000 0 [ 5]
+'1' ( 49) |00001 1 [ 5]
+'2' ( 50) |00010 2 [ 5]
+'3' ( 51) |011001 19 [ 6]
+'4' ( 52) |011010 1a [ 6]
+'5' ( 53) |011011 1b [ 6]
+'6' ( 54) |011100 1c [ 6]
+'7' ( 55) |011101 1d [ 6]
+'8' ( 56) |011110 1e [ 6]
+'9' ( 57) |011111 1f [ 6]
+':' ( 58) |1011100 5c [ 7]
+';' ( 59) |11111011 fb [ 8]
+'<' ( 60) |11111111|1111100 7ffc [15]
+'=' ( 61) |100000 20 [ 6]
+'>' ( 62) |11111111|1011 ffb [12]
+'?' ( 63) |11111111|00 3fc [10]
+'@' ( 64) |11111111|11010 1ffa [13]
+'A' ( 65) |100001 21 [ 6]
+'B' ( 66) |1011101 5d [ 7]
+'C' ( 67) |1011110 5e [ 7]
+'D' ( 68) |1011111 5f [ 7]
+'E' ( 69) |1100000 60 [ 7]
+'F' ( 70) |1100001 61 [ 7]
+'G' ( 71) |1100010 62 [ 7]
+'H' ( 72) |1100011 63 [ 7]
+'I' ( 73) |1100100 64 [ 7]
+'J' ( 74) |1100101 65 [ 7]
+'K' ( 75) |1100110 66 [ 7]
+'L' ( 76) |1100111 67 [ 7]
+'M' ( 77) |1101000 68 [ 7]
+'N' ( 78) |1101001 69 [ 7]
+'O' ( 79) |1101010 6a [ 7]
+'P' ( 80) |1101011 6b [ 7]
+'Q' ( 81) |1101100 6c [ 7]
+'R' ( 82) |1101101 6d [ 7]
+'S' ( 83) |1101110 6e [ 7]
+'T' ( 84) |1101111 6f [ 7]
+'U' ( 85) |1110000 70 [ 7]
+'V' ( 86) |1110001 71 [ 7]
+'W' ( 87) |1110010 72 [ 7]
+'X' ( 88) |11111100 fc [ 8]
+'Y' ( 89) |1110011 73 [ 7]
+'Z' ( 90) |11111101 fd [ 8]
+'[' ( 91) |11111111|11011 1ffb [13]
+'\' ( 92) |11111111|11111110|000 7fff0 [19]
+']' ( 93) |11111111|11100 1ffc [13]
+'^' ( 94) |11111111|111100 3ffc [14]
+'_' ( 95) |100010 22 [ 6]
+'`' ( 96) |11111111|1111101 7ffd [15]
+'a' ( 97) |00011 3 [ 5]
+'b' ( 98) |100011 23 [ 6]
+'c' ( 99) |00100 4 [ 5]
+'d' (100) |100100 24 [ 6]
+'e' (101) |00101 5 [ 5]
+'f' (102) |100101 25 [ 6]
+'g' (103) |100110 26 [ 6]
+'h' (104) |100111 27 [ 6]
+'i' (105) |00110 6 [ 5]
+'j' (106) |1110100 74 [ 7]
+'k' (107) |1110101 75 [ 7]
+'l' (108) |101000 28 [ 6]
+'m' (109) |101001 29 [ 6]
+'n' (110) |101010 2a [ 6]
+'o' (111) |00111 7 [ 5]
+'p' (112) |101011 2b [ 6]
+'q' (113) |1110110 76 [ 7]
+'r' (114) |101100 2c [ 6]
+'s' (115) |01000 8 [ 5]
+'t' (116) |01001 9 [ 5]
+'u' (117) |101101 2d [ 6]
+'v' (118) |1110111 77 [ 7]
+'w' (119) |1111000 78 [ 7]
+'x' (120) |1111001 79 [ 7]
+'y' (121) |1111010 7a [ 7]
+'z' (122) |1111011 7b [ 7]
+'{' (123) |11111111|1111110 7ffe [15]
+'|' (124) |11111111|100 7fc [11]
+'}' (125) |11111111|111101 3ffd [14]
+'~' (126) |11111111|11101 1ffd [13]
+ (127) |11111111|11111111|11111111|1100 ffffffc [28]
+ (128) |11111111|11111110|0110 fffe6 [20]
+ (129) |11111111|11111111|010010 3fffd2 [22]
+ (130) |11111111|11111110|0111 fffe7 [20]
+ (131) |11111111|11111110|1000 fffe8 [20]
+ (132) |11111111|11111111|010011 3fffd3 [22]
+ (133) |11111111|11111111|010100 3fffd4 [22]
+ (134) |11111111|11111111|010101 3fffd5 [22]
+ (135) |11111111|11111111|1011001 7fffd9 [23]
+ (136) |11111111|11111111|010110 3fffd6 [22]
+ (137) |11111111|11111111|1011010 7fffda [23]
+ (138) |11111111|11111111|1011011 7fffdb [23]
+ (139) |11111111|11111111|1011100 7fffdc [23]
+ (140) |11111111|11111111|1011101 7fffdd [23]
+ (141) |11111111|11111111|1011110 7fffde [23]
+ (142) |11111111|11111111|11101011 ffffeb [24]
+ (143) |11111111|11111111|1011111 7fffdf [23]
+ (144) |11111111|11111111|11101100 ffffec [24]
+ (145) |11111111|11111111|11101101 ffffed [24]
+ (146) |11111111|11111111|010111 3fffd7 [22]
+ (147) |11111111|11111111|1100000 7fffe0 [23]
+ (148) |11111111|11111111|11101110 ffffee [24]
+ (149) |11111111|11111111|1100001 7fffe1 [23]
+ (150) |11111111|11111111|1100010 7fffe2 [23]
+ (151) |11111111|11111111|1100011 7fffe3 [23]
+ (152) |11111111|11111111|1100100 7fffe4 [23]
+ (153) |11111111|11111110|11100 1fffdc [21]
+ (154) |11111111|11111111|011000 3fffd8 [22]
+ (155) |11111111|11111111|1100101 7fffe5 [23]
+ (156) |11111111|11111111|011001 3fffd9 [22]
+ (157) |11111111|11111111|1100110 7fffe6 [23]
+ (158) |11111111|11111111|1100111 7fffe7 [23]
+ (159) |11111111|11111111|11101111 ffffef [24]
+ (160) |11111111|11111111|011010 3fffda [22]
+ (161) |11111111|11111110|11101 1fffdd [21]
+ (162) |11111111|11111110|1001 fffe9 [20]
+ (163) |11111111|11111111|011011 3fffdb [22]
+ (164) |11111111|11111111|011100 3fffdc [22]
+ (165) |11111111|11111111|1101000 7fffe8 [23]
+ (166) |11111111|11111111|1101001 7fffe9 [23]
+ (167) |11111111|11111110|11110 1fffde [21]
+ (168) |11111111|11111111|1101010 7fffea [23]
+ (169) |11111111|11111111|011101 3fffdd [22]
+ (170) |11111111|11111111|011110 3fffde [22]
+ (171) |11111111|11111111|11110000 fffff0 [24]
+ (172) |11111111|11111110|11111 1fffdf [21]
+ (173) |11111111|11111111|011111 3fffdf [22]
+ (174) |11111111|11111111|1101011 7fffeb [23]
+ (175) |11111111|11111111|1101100 7fffec [23]
+ (176) |11111111|11111111|00000 1fffe0 [21]
+ (177) |11111111|11111111|00001 1fffe1 [21]
+ (178) |11111111|11111111|100000 3fffe0 [22]
+ (179) |11111111|11111111|00010 1fffe2 [21]
+ (180) |11111111|11111111|1101101 7fffed [23]
+ (181) |11111111|11111111|100001 3fffe1 [22]
+ (182) |11111111|11111111|1101110 7fffee [23]
+ (183) |11111111|11111111|1101111 7fffef [23]
+ (184) |11111111|11111110|1010 fffea [20]
+ (185) |11111111|11111111|100010 3fffe2 [22]
+ (186) |11111111|11111111|100011 3fffe3 [22]
+ (187) |11111111|11111111|100100 3fffe4 [22]
+ (188) |11111111|11111111|1110000 7ffff0 [23]
+ (189) |11111111|11111111|100101 3fffe5 [22]
+ (190) |11111111|11111111|100110 3fffe6 [22]
+ (191) |11111111|11111111|1110001 7ffff1 [23]
+ (192) |11111111|11111111|11111000|00 3ffffe0 [26]
+ (193) |11111111|11111111|11111000|01 3ffffe1 [26]
+ (194) |11111111|11111110|1011 fffeb [20]
+ (195) |11111111|11111110|001 7fff1 [19]
+ (196) |11111111|11111111|100111 3fffe7 [22]
+ (197) |11111111|11111111|1110010 7ffff2 [23]
+ (198) |11111111|11111111|101000 3fffe8 [22]
+ (199) |11111111|11111111|11110110|0 1ffffec [25]
+ (200) |11111111|11111111|11111000|10 3ffffe2 [26]
+ (201) |11111111|11111111|11111000|11 3ffffe3 [26]
+ (202) |11111111|11111111|11111001|00 3ffffe4 [26]
+ (203) |11111111|11111111|11111011|110 7ffffde [27]
+ (204) |11111111|11111111|11111011|111 7ffffdf [27]
+ (205) |11111111|11111111|11111001|01 3ffffe5 [26]
+ (206) |11111111|11111111|11110001 fffff1 [24]
+ (207) |11111111|11111111|11110110|1 1ffffed [25]
+ (208) |11111111|11111110|010 7fff2 [19]
+ (209) |11111111|11111111|00011 1fffe3 [21]
+ (210) |11111111|11111111|11111001|10 3ffffe6 [26]
+ (211) |11111111|11111111|11111100|000 7ffffe0 [27]
+ (212) |11111111|11111111|11111100|001 7ffffe1 [27]
+ (213) |11111111|11111111|11111001|11 3ffffe7 [26]
+ (214) |11111111|11111111|11111100|010 7ffffe2 [27]
+ (215) |11111111|11111111|11110010 fffff2 [24]
+ (216) |11111111|11111111|00100 1fffe4 [21]
+ (217) |11111111|11111111|00101 1fffe5 [21]
+ (218) |11111111|11111111|11111010|00 3ffffe8 [26]
+ (219) |11111111|11111111|11111010|01 3ffffe9 [26]
+ (220) |11111111|11111111|11111111|1101 ffffffd [28]
+ (221) |11111111|11111111|11111100|011 7ffffe3 [27]
+ (222) |11111111|11111111|11111100|100 7ffffe4 [27]
+ (223) |11111111|11111111|11111100|101 7ffffe5 [27]
+ (224) |11111111|11111110|1100 fffec [20]
+ (225) |11111111|11111111|11110011 fffff3 [24]
+ (226) |11111111|11111110|1101 fffed [20]
+ (227) |11111111|11111111|00110 1fffe6 [21]
+ (228) |11111111|11111111|101001 3fffe9 [22]
+ (229) |11111111|11111111|00111 1fffe7 [21]
+ (230) |11111111|11111111|01000 1fffe8 [21]
+ (231) |11111111|11111111|1110011 7ffff3 [23]
+ (232) |11111111|11111111|101010 3fffea [22]
+ (233) |11111111|11111111|101011 3fffeb [22]
+ (234) |11111111|11111111|11110111|0 1ffffee [25]
+ (235) |11111111|11111111|11110111|1 1ffffef [25]
+ (236) |11111111|11111111|11110100 fffff4 [24]
+ (237) |11111111|11111111|11110101 fffff5 [24]
+ (238) |11111111|11111111|11111010|10 3ffffea [26]
+ (239) |11111111|11111111|1110100 7ffff4 [23]
+ (240) |11111111|11111111|11111010|11 3ffffeb [26]
+ (241) |11111111|11111111|11111100|110 7ffffe6 [27]
+ (242) |11111111|11111111|11111011|00 3ffffec [26]
+ (243) |11111111|11111111|11111011|01 3ffffed [26]
+ (244) |11111111|11111111|11111100|111 7ffffe7 [27]
+ (245) |11111111|11111111|11111101|000 7ffffe8 [27]
+ (246) |11111111|11111111|11111101|001 7ffffe9 [27]
+ (247) |11111111|11111111|11111101|010 7ffffea [27]
+ (248) |11111111|11111111|11111101|011 7ffffeb [27]
+ (249) |11111111|11111111|11111111|1110 ffffffe [28]
+ (250) |11111111|11111111|11111101|100 7ffffec [27]
+ (251) |11111111|11111111|11111101|101 7ffffed [27]
+ (252) |11111111|11111111|11111101|110 7ffffee [27]
+ (253) |11111111|11111111|11111101|111 7ffffef [27]
+ (254) |11111111|11111111|11111110|000 7fffff0 [27]
+ (255) |11111111|11111111|11111011|10 3ffffee [26]
+EOS (256) |11111111|11111111|11111111|111111 3fffffff [30]
+"""
+
+class Node:
+
+ def __init__(self, term = None):
+ self.term = term
+ self.left = None
+ self.right = None
+ self.trans = []
+ self.id = None
+ self.accept = False
+
+class Context:
+
+ def __init__(self):
+ self.next_id_ = 0
+ self.root = Node()
+
+ def next_id(self):
+ id = self.next_id_
+ self.next_id_ += 1
+ return id
+
+def _add(node, sym, bits):
+ if len(bits) == 0:
+ node.term = sym
+ return
+ else:
+ if bits[0] == '0':
+ if node.left is None:
+ node.left = Node()
+ child = node.left
+ else:
+ if node.right is None:
+ node.right = Node()
+ child = node.right
+ _add(child, sym, bits[1:])
+
+def huffman_tree_add(ctx, sym, bits):
+ _add(ctx.root, sym, bits)
+
+def _set_node_id(ctx, node, prefix):
+ if node.term is not None:
+ return
+ if len(prefix) <= 7 and [1] * len(prefix) == prefix:
+ node.accept = True
+ node.id = ctx.next_id()
+ _set_node_id(ctx, node.left, prefix + [0])
+ _set_node_id(ctx, node.right, prefix + [1])
+
+def huffman_tree_set_node_id(ctx):
+ _set_node_id(ctx, ctx.root, [])
+
+def _traverse(node, sym, start_node, root, left):
+ if left == 0:
+ if sym == 256:
+ sym = None
+ node = None
+ start_node.trans.append((node, sym))
+ return
+
+ if node.term is not None:
+ node = root
+
+ def go(node):
+ if node.term is not None:
+ assert sym is None
+ nsym = node.term
+ else:
+ nsym = sym
+
+ _traverse(node, nsym, start_node, root, left - 1)
+
+ go(node.left)
+ go(node.right)
+
+def _build_transition_table(ctx, node):
+ if node is None:
+ return
+ _traverse(node, None, node, ctx.root, 4)
+ _build_transition_table(ctx, node.left)
+ _build_transition_table(ctx, node.right)
+
+def huffman_tree_build_transition_table(ctx):
+ _build_transition_table(ctx, ctx.root)
+
+NGHTTP2_HUFF_ACCEPTED = 1 << 14
+NGHTTP2_HUFF_SYM = 1 << 15
+
+def _print_transition_table(node):
+ if node.term is not None:
+ return
+ print('/* {} */'.format(node.id))
+ print('{')
+ for nd, sym in node.trans:
+ flags = 0
+ if sym is None:
+ out = 0
+ else:
+ out = sym
+ flags |= NGHTTP2_HUFF_SYM
+ if nd is None:
+ id = 256
+ else:
+ id = nd.id
+ if id is None:
+ # if nd.id is None, it is a leaf node
+ id = 0
+ flags |= NGHTTP2_HUFF_ACCEPTED
+ elif nd.accept:
+ flags |= NGHTTP2_HUFF_ACCEPTED
+ print(' {{0x{:02x}, {}}},'.format(id | flags, out))
+ print('},')
+ _print_transition_table(node.left)
+ _print_transition_table(node.right)
+
+def huffman_tree_print_transition_table(ctx):
+ _print_transition_table(ctx.root)
+ print('/* 256 */')
+ print('{')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print(' {0x100, 0},')
+ print('},')
+
+if __name__ == '__main__':
+ ctx = Context()
+ symbol_tbl = [(None, 0) for i in range(257)]
+
+ for line in StringIO(HUFFMAN_CODE_TABLE):
+ m = re.match(
+ r'.*\(\s*(\d+)\)\s+([|01]+)\s+(\S+)\s+\[\s*(\d+)\].*', line)
+ if m:
+ sym = int(m.group(1))
+ bits = re.sub(r'\|', '', m.group(2))
+ code = m.group(3)
+ nbits = int(m.group(4))
+ if len(code) > 8:
+ raise Error('Code is more than 4 bytes long')
+ assert(len(bits) == nbits)
+ symbol_tbl[sym] = (nbits, code)
+ huffman_tree_add(ctx, sym, bits)
+
+ huffman_tree_set_node_id(ctx)
+ huffman_tree_build_transition_table(ctx)
+
+ print('''\
+typedef struct {
+ uint32_t nbits;
+ uint32_t code;
+} nghttp2_huff_sym;
+''')
+
+ print('''\
+const nghttp2_huff_sym huff_sym_table[] = {''')
+ for i in range(257):
+ nbits = symbol_tbl[i][0]
+ k = int(symbol_tbl[i][1], 16)
+ k = k << (32 - nbits)
+ print('''\
+ {{ {}, 0x{}u }}{}\
+'''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else ''))
+ print('};')
+ print()
+
+ print('''\
+enum {{
+ NGHTTP2_HUFF_ACCEPTED = {},
+ NGHTTP2_HUFF_SYM = {},
+}} nghttp2_huff_decode_flag;
+'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM))
+
+ print('''\
+typedef struct {
+ uint16_t fstate;
+ uint8_t sym;
+} nghttp2_huff_decode;
+''')
+
+ print('''\
+const nghttp2_huff_decode huff_decode_table[][16] = {''')
+ huffman_tree_print_transition_table(ctx)
+ print('};')