diff options
Diffstat (limited to 'src/common/unicode/generate-unicode_combining_table.pl')
-rw-r--r-- | src/common/unicode/generate-unicode_combining_table.pl | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl new file mode 100644 index 0000000..8177c20 --- /dev/null +++ b/src/common/unicode/generate-unicode_combining_table.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl +# +# Generate sorted list of non-overlapping intervals of non-spacing +# characters, using Unicode data files as input. Pass UnicodeData.txt +# as argument. The output is on stdout. +# +# Copyright (c) 2019-2022, PostgreSQL Global Development Group + +use strict; +use warnings; + +my $range_start = undef; +my $codepoint; +my $prev_codepoint; +my $count = 0; + +print + "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n"; + +print "static const struct mbinterval combining[] = {\n"; + +foreach my $line (<ARGV>) +{ + chomp $line; + my @fields = split ';', $line; + $codepoint = hex $fields[0]; + + if ($fields[2] eq 'Me' || $fields[2] eq 'Mn') + { + # combining character, save for start of range + if (!defined($range_start)) + { + $range_start = $codepoint; + } + } + else + { + # not a combining character, print out previous range if any + if (defined($range_start)) + { + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint; + $range_start = undef; + } + } +} +continue +{ + $prev_codepoint = $codepoint; +} + +print "};\n"; |