diff options
Diffstat (limited to 'src/lib/unicodemap.pl')
-rwxr-xr-x | src/lib/unicodemap.pl | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/src/lib/unicodemap.pl b/src/lib/unicodemap.pl new file mode 100755 index 0000000..2c1bf7a --- /dev/null +++ b/src/lib/unicodemap.pl @@ -0,0 +1,162 @@ +#!/usr/bin/env perl +use strict; + +my (%titlecase8, %uni8_decomp); +my (@titlecase16_keys, @titlecase16_values); +my (@titlecase32_keys, @titlecase32_values); +my (@uni16_decomp_keys, @uni16_decomp_values); +my (@uni32_decomp_keys, @uni32_decomp_values); +my (@multidecomp_keys, @multidecomp_offsets, @multidecomp_values); +while (<>) { + chomp $_; + my @arr = split(";"); + my $code = eval("0x".$arr[0]); + my $decomp = $arr[5]; + my $titlecode = $arr[14]; + + if ($titlecode ne "") { + # titlecase mapping + my $value = eval("0x$titlecode"); + if ($value == $code) { + # the same character, ignore + } elsif ($code <= 0xff) { + die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff); + $titlecase8{$code} = $value; + } elsif ($code <= 0xffff) { + die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff); + push @titlecase16_keys, $code; + push @titlecase16_values, $value; + } else { + push @titlecase32_keys, $code; + push @titlecase32_values, $value; + } + } elsif ($decomp =~ /(?:\<[^>]*> )?(.+)/) { + # decompositions + my $decomp_codes = $1; + if ($decomp_codes =~ /^([0-9A-Z]*)$/i) { + # unicharacter decomposition. use separate lists for this + my $value = eval("0x$1"); + if ($value > 0xffffffff) { + print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; + exit 1; + } + if ($code <= 0xff) { + $uni8_decomp{$code} = $value; + } elsif ($code <= 0xffff) { + push @uni16_decomp_keys, $code; + push @uni16_decomp_values, $value; + } else { + push @uni32_decomp_keys, $code; + push @uni32_decomp_values, $value; + } + } else { + # multicharacter decomposition. + if ($code > 0xffffffff) { + print STDERR "Error: We've assumed multi-decomposition key codes are max. 32bit\n"; + exit 1; + } + + push @multidecomp_keys, $code; + push @multidecomp_offsets, scalar(@multidecomp_values); + + foreach my $dcode (split(" ", $decomp_codes)) { + my $value = eval("0x$dcode"); + if ($value > 0xffffffff) { + print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; + exit 1; + } + push @multidecomp_values, $value; + } + push @multidecomp_values, 0; + } + } +} + +sub print_list { + my @list = @{$_[0]}; + + my $last = $#list; + my $n = 0; + foreach my $key (@list) { + printf("0x%05x", $key); + last if ($n == $last); + print ","; + + $n++; + if (($n % 8) == 0) { + print "\n\t"; + } else { + print " "; + } + } +} + +print "/* This file is automatically generated by unicodemap.pl from UnicodeData.txt + + NOTE: decompositions for characters having titlecase characters + are not included, because we first translate everything to titlecase */\n"; + +sub print_map8 { + my %map = %{$_[0]}; + my @list; + for (my $i = 0; $i <= 0xff; $i++) { + if (defined($map{$i})) { + push @list, $map{$i}; + } else { + push @list, $i; + } + } + print_list(\@list); +} + +print "static const uint16_t titlecase8_map[256] = {\n\t"; +print_map8(\%titlecase8); +print "\n};\n"; + +print "static const uint16_t titlecase16_keys[] = {\n\t"; +print_list(\@titlecase16_keys); +print "\n};\n"; + +print "static const uint16_t titlecase16_values[] = {\n\t"; +print_list(\@titlecase16_values); +print "\n};\n"; + +print "static const uint32_t titlecase32_keys[] = {\n\t"; +print_list(\@titlecase32_keys); +print "\n};\n"; + +print "static const uint32_t titlecase32_values[] = {\n\t"; +print_list(\@titlecase32_values); +print "\n};\n"; + +print "static const uint16_t uni8_decomp_map[256] = {\n\t"; +print_map8(\%uni8_decomp); +print "\n};\n"; + +print "static const uint16_t uni16_decomp_keys[] = {\n\t"; +print_list(\@uni16_decomp_keys); +print "\n};\n"; + +print "static const uint32_t uni16_decomp_values[] = {\n\t"; +print_list(\@uni16_decomp_values); +print "\n};\n"; + +print "static const uint32_t uni32_decomp_keys[] = {\n\t"; +print_list(\@uni32_decomp_keys); +print "\n};\n"; + +print "static const uint32_t uni32_decomp_values[] = {\n\t"; +print_list(\@uni32_decomp_values); +print "\n};\n"; + +print "static const uint32_t multidecomp_keys[] = {\n\t"; +print_list(\@multidecomp_keys); +print "\n};\n"; + +print "static const uint16_t multidecomp_offsets[] = {\n\t"; +print_list(\@multidecomp_offsets); +print "\n};\n"; + +print "static const uint32_t multidecomp_values[] = {\n\t"; +print_list(\@multidecomp_values); +print "\n};\n"; |