From 6eb9c5a5657d1fe77b55cc261450f3538d35a94d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:19:15 +0200 Subject: Adding upstream version 13.4. Signed-off-by: Daniel Baumann --- .../unicode/generate-unicode_normprops_table.pl | 88 ++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/common/unicode/generate-unicode_normprops_table.pl (limited to 'src/common/unicode/generate-unicode_normprops_table.pl') diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl new file mode 100644 index 0000000..e8e5097 --- /dev/null +++ b/src/common/unicode/generate-unicode_normprops_table.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl +# +# Generate table of Unicode normalization "quick check" properties +# (see UAX #15). Pass DerivedNormalizationProps.txt as argument. The +# output is on stdout. +# +# Copyright (c) 2020, PostgreSQL Global Development Group + +use strict; +use warnings; + +my %data; + +print + "/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n"; + +print <) +{ + chomp $line; + $line =~ s/\s*#.*$//; + next if $line eq ''; + my ($codepoint, $prop, $value) = split /\s*;\s*/, $line; + next if $prop !~ /_QC/; + + my ($first, $last); + if ($codepoint =~ /\.\./) + { + ($first, $last) = split /\.\./, $codepoint; + } + else + { + $first = $last = $codepoint; + } + + foreach my $cp (hex($first) .. hex($last)) + { + $data{$prop}{$cp} = $value; + } +} + +# We create a separate array for each normalization form rather than, +# say, a two-dimensional array, because that array would be very +# sparse and would create unnecessary overhead especially for the NFC +# lookup. +foreach my $prop (sort keys %data) +{ + # Don't build the tables for the "D" forms because they are too + # big. See also unicode_is_normalized_quickcheck(). + next if $prop eq "NFD_QC" || $prop eq "NFKD_QC"; + + print "\n"; + print + "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n"; + + my %subdata = %{ $data{$prop} }; + foreach my $cp (sort { $a <=> $b } keys %subdata) + { + my $qc; + if ($subdata{$cp} eq 'N') + { + $qc = 'UNICODE_NORM_QC_NO'; + } + elsif ($subdata{$cp} eq 'M') + { + $qc = 'UNICODE_NORM_QC_MAYBE'; + } + else + { + die; + } + printf "\t{0x%04X, %s},\n", $cp, $qc; + } + + print "};\n"; +} -- cgit v1.2.3