summaryrefslogtreecommitdiffstats
path: root/gfx/harfbuzz/src/gen-emoji-table.py
blob: 42a3fb8de59744fcacd554c380dc568791c9446f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3

"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt

Input file:
* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
"""

import sys
from collections import OrderedDict
import packTab

if len (sys.argv) != 3:
	sys.exit (__doc__)

f = open(sys.argv[1])
header = [f.readline () for _ in range(10)]

ranges = OrderedDict()
for line in f.readlines():
	line = line.strip()
	if not line or line[0] == '#':
		continue
	rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]

	rang = [int(s, 16) for s in rang.split('..')]
	if len(rang) > 1:
		start, end = rang
	else:
		start = end = rang[0]

	if typ not in ranges:
		ranges[typ] = []
	if ranges[typ] and ranges[typ][-1][1] == start - 1:
		ranges[typ][-1] = (ranges[typ][-1][0], end)
	else:
		ranges[typ].append((start, end))



print ("/* == Start of generated table == */")
print ("/*")
print (" * The following tables are generated by running:")
print (" *")
print (" *   ./gen-emoji-table.py emoji-data.txt")
print (" *")
print (" * on file with this header:")
print (" *")
for l in header:
	print (" * %s" % (l.strip()))
print (" */")
print ()
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
print ()
print ('#include "hb-unicode.hh"')
print ()

for typ, s in ranges.items():
	if typ != "Extended_Pictographic": continue

	arr = dict()
	for start,end in s:
		for i in range(start, end + 1):
			arr[i] = 1

	sol = packTab.pack_table(arr, 0, compression=9)
	code = packTab.Code('_hb_emoji')
	sol.genCode(code, 'is_'+typ)
	code.print_c(linkage='static inline')
	print()

print ()
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print ()
print ("/* == End of generated table == */")


# Generate test file.
sequences = []
with open(sys.argv[2]) as f:
    for line in f.readlines():
        if "#" in line:
            line = line[:line.index("#")]
        if ";" in line:
            line = line[:line.index(";")]
        line = line.strip()
        line = line.split(" ")
        if len(line) < 2:
            continue
        sequences.append(line)

with open("../test/shape/data/in-house/tests/emoji-clusters.tests", "w") as f:
    for sequence in sequences:
        f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot")
        f.write(";" + ",".join(sequence))
        f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")