summaryrefslogtreecommitdiffstats
path: root/src/common/unicode/generate-unicode_combining_table.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/unicode/generate-unicode_combining_table.pl')
-rw-r--r--src/common/unicode/generate-unicode_combining_table.pl51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl
new file mode 100644
index 0000000..8177c20
--- /dev/null
+++ b/src/common/unicode/generate-unicode_combining_table.pl
@@ -0,0 +1,51 @@
+#!/usr/bin/perl
+#
+# Generate sorted list of non-overlapping intervals of non-spacing
+# characters, using Unicode data files as input. Pass UnicodeData.txt
+# as argument. The output is on stdout.
+#
+# Copyright (c) 2019-2022, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my $codepoint;
+my $prev_codepoint;
+my $count = 0;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval combining[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ my @fields = split ';', $line;
+ $codepoint = hex $fields[0];
+
+ if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
+ {
+ # combining character, save for start of range
+ if (!defined($range_start))
+ {
+ $range_start = $codepoint;
+ }
+ }
+ else
+ {
+ # not a combining character, print out previous range if any
+ if (defined($range_start))
+ {
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
+ $range_start = undef;
+ }
+ }
+}
+continue
+{
+ $prev_codepoint = $codepoint;
+}
+
+print "};\n";