Adding upstream version 13.4.upstream/13.4 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:19:15 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:19:15 +0000
commit: 6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch)
tree: 657d8194422a5daccecfd42d654b8a245ef7b4c8 /src/common/unicode/generate-unicode_normprops_table.pl
parent: Initial commit. (diff)
download: postgresql-13-upstream.tar.xz
postgresql-13-upstream.zip
1 files changed, 88 insertions, 0 deletions
diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl
new file mode 100644
index 0000000..e8e5097
--- /dev/null
+++ b/src/common/unicode/generate-unicode_normprops_table.pl
@@ -0,0 +1,88 @@
+#!/usr/bin/perl
+#
+# Generate table of Unicode normalization "quick check" properties
+# (see UAX #15).  Pass DerivedNormalizationProps.txt as argument.  The
+# output is on stdout.
+#
+# Copyright (c) 2020, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my %data;
+
+print
+  "/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n";
+
+print <<EOS;
+#include "common/unicode_norm.h"
+
+/*
+ * We use a bit field here to save space.
+ */
+typedef struct
+{
+	unsigned int codepoint:21;
+	signed int	quickcheck:4;	/* really UnicodeNormalizationQC */
+}			pg_unicode_normprops;
+EOS
+
+foreach my $line (<ARGV>)
+{
+	chomp $line;
+	$line =~ s/\s*#.*$//;
+	next if $line eq '';
+	my ($codepoint, $prop, $value) = split /\s*;\s*/, $line;
+	next if $prop !~ /_QC/;
+
+	my ($first, $last);
+	if ($codepoint =~ /\.\./)
+	{
+		($first, $last) = split /\.\./, $codepoint;
+	}
+	else
+	{
+		$first = $last = $codepoint;
+	}
+
+	foreach my $cp (hex($first) .. hex($last))
+	{
+		$data{$prop}{$cp} = $value;
+	}
+}
+
+# We create a separate array for each normalization form rather than,
+# say, a two-dimensional array, because that array would be very
+# sparse and would create unnecessary overhead especially for the NFC
+# lookup.
+foreach my $prop (sort keys %data)
+{
+	# Don't build the tables for the "D" forms because they are too
+	# big.  See also unicode_is_normalized_quickcheck().
+	next if $prop eq "NFD_QC" || $prop eq "NFKD_QC";
+
+	print "\n";
+	print
+	  "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
+
+	my %subdata = %{ $data{$prop} };
+	foreach my $cp (sort { $a <=> $b } keys %subdata)
+	{
+		my $qc;
+		if ($subdata{$cp} eq 'N')
+		{
+			$qc = 'UNICODE_NORM_QC_NO';
+		}
+		elsif ($subdata{$cp} eq 'M')
+		{
+			$qc = 'UNICODE_NORM_QC_MAYBE';
+		}
+		else
+		{
+			die;
+		}
+		printf "\t{0x%04X, %s},\n", $cp, $qc;
+	}
+
+	print "};\n";
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:19:15 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:19:15 +0000
commit	6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch)
tree	657d8194422a5daccecfd42d654b8a245ef7b4c8 /src/common/unicode/generate-unicode_normprops_table.pl
parent	Initial commit. (diff)
download	postgresql-13-upstream.tar.xz postgresql-13-upstream.zip