summaryrefslogtreecommitdiffstats
path: root/regexp/parse-unidata.awk
blob: b9ed351ee63b071ee6c21540cd60ffec7a4aa036 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#
# parse-unidata.awk - generate a table (unicode_case_mapping_upper)
#
# Copyright (C) 2020 g10 Code GmbH
#
# This file is part of GnuPG.
#
# GnuPG is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GnuPG is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#

# Parse the unicode data from:
#   https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
# to generate case mapping table

BEGIN {
    print("/* Generated from UnicodeData.txt */")
    print("")
    print("static const struct casemap unicode_case_mapping_upper[] = {")
    FS = ";"
    count = 0
}

{
    code = int("0x" $1)
    name = $2
    class = $3
    upper = $13
    lower = $14
    title = $15

    if (code <= 127) {
	next
    }
    if (code > 65535) {
	next
    }
    if ($3 !~ /^L.*/) {
	next
    }
    if (upper != "") {
	printf("\t{ 0x" tolower($1) ", 0x" tolower(upper) " },")
	count++
	if ((count % 4) == 0) {
	    print("")
	}
    }
}

END {
    print("\n};")
}