summaryrefslogtreecommitdiffstats
path: root/sal/textenc/generate/cns116431992.pl
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
commit940b4d1848e8c70ab7642901a68594e8016caffc (patch)
treeeb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /sal/textenc/generate/cns116431992.pl
parentInitial commit. (diff)
downloadlibreoffice-940b4d1848e8c70ab7642901a68594e8016caffc.tar.xz
libreoffice-940b4d1848e8c70ab7642901a68594e8016caffc.zip
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sal/textenc/generate/cns116431992.pl')
-rw-r--r--sal/textenc/generate/cns116431992.pl845
1 files changed, 845 insertions, 0 deletions
diff --git a/sal/textenc/generate/cns116431992.pl b/sal/textenc/generate/cns116431992.pl
new file mode 100644
index 000000000..29bbefabf
--- /dev/null
+++ b/sal/textenc/generate/cns116431992.pl
@@ -0,0 +1,845 @@
+#!/usr/bin/env perl
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+
+# The following files must be available in a ./input subdir:
+
+# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>:
+# "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001"
+# contains descriptions for:
+# U+3400..4DFF CJK Unified Ideographs Extension A
+# U+4E00..9FFF CJK Unified Ideographs
+# U+F900..FAFF CJK Compatibility Ideographs
+# U+20000..2F7FF CJK Unified Ideographs Extension B
+# U+2F800..2FFFF CJK Compatibility Ideographs Supplement
+
+# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>:
+# "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994"
+# contains mappings for CNS 11643-1986
+
+# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>:
+# "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998"
+# contains mappings for CNS 11643-1992 that are incompatible with
+# CNS11643.TXT
+
+$id = "Cns116431992";
+
+sub isValidUtf32
+{
+ my $utf32 = $_[0];
+ return $utf32 >= 0 && $utf32 <= 0x10FFFF
+ && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
+ && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
+ && ($utf32 & 0xFFFF) < 0xFFFE;
+}
+
+sub printUtf32
+{
+ my $utf32 = $_[0];
+ return sprintf("U+%04X", $utf32);
+}
+
+sub isValidCns116431992
+{
+ my $plane = $_[0];
+ my $row = $_[1];
+ my $column = $_[2];
+ return $plane >= 1 && $plane <= 16
+ && $row >= 1 && $row <= 94
+ && $column >= 1 && $column <= 94;
+}
+
+sub printCns116431992
+{
+ my $plane = $_[0];
+ my $row = $_[1];
+ my $column = $_[2];
+ return sprintf("%d-%02d/%02d", $plane, $row, $column);
+}
+
+sub printStats
+{
+ my $used = $_[0];
+ my $space = $_[1];
+ return sprintf("%d/%d bytes (%.1f%%)",
+ $used,
+ $space,
+ $used * 100 / $space);
+}
+
+sub printSpaces
+{
+ my $column_width = $_[0];
+ my $columns_per_line = $_[1];
+ my $end = $_[2];
+ $output = "";
+ for ($i = int($end / $columns_per_line) * $columns_per_line;
+ $i < $end;
+ ++$i)
+ {
+ for ($j = 0; $j < $column_width; ++$j)
+ {
+ $output = $output . " ";
+ }
+ }
+ return $output;
+}
+
+$count_Unihan_txt = 0;
+$count_CNS11643_TXT = 0;
+$count_Uni2CNS = 0;
+
+if (1)
+{
+ $filename = "Unihan.txt";
+ open IN, ("input/" . $filename) or die "Cannot read " . $filename;
+ while (<IN>)
+ {
+ if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
+ {
+ $utf32 = oct("0x" . $1);
+ $cns_plane = oct("0x" . $2);
+ $cns_row = oct("0x" . $3) - 0x20;
+ $cns_column = oct("0x" . $4) - 0x20;
+ isValidUtf32($utf32)
+ or die "Bad UTF32 char U+" . printUtf32($utf32);
+ isValidCns116431992($cns_plane, $cns_row, $cns_column)
+ or die "Bad CNS11643-1992 char "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column);
+ if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
+ $cns_plane_used[$cns_plane] = 1;
+ ++$count_Unihan_txt;
+ }
+ else
+ {
+ ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
+ or die "Mapping "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column)
+ . " to "
+ . printUtf32($cns_map[$cns_plane]
+ [$cns_row]
+ [$cns_column])
+ . ", NOT "
+ . printUtf32($utf32);
+ }
+ }
+ elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
+ {
+ $utf32 = oct("0x" . $1);
+ $cns_plane = oct("0x" . $2);
+ $cns_row = oct("0x" . $3) - 0x20;
+ $cns_column = oct("0x" . $4) - 0x20;
+ isValidUtf32($utf32)
+ or die "Bad UTF32 char U+" . printUtf32($utf32);
+ isValidCns116431992($cns_plane, $cns_row, $cns_column)
+ or die "Bad CNS11643-1992 char "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column);
+ if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
+ $cns_plane_used[$cns_plane] = 1;
+ ++$count_Unihan_txt;
+ }
+ else
+ {
+ ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
+ or print "WARNING! Mapping ",
+ printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column),
+ " to ",
+ printUtf32($cns_map[$cns_plane]
+ [$cns_row]
+ [$cns_column]),
+ ", NOT ",
+ printUtf32($utf32),
+ "\n";
+ }
+ }
+ elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/)
+ {
+ die "Bad format";
+ }
+ }
+ close IN;
+}
+
+if (1)
+{
+ $filename = "CNS11643.TXT";
+ open IN, ("input/" . $filename) or die "Cannot read " . $filename;
+ while (<IN>)
+ {
+ if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/)
+ {
+ $utf32 = oct("0x" . $4);
+ $cns_plane = oct("0x" . $1);
+ $cns_row = oct("0x" . $2) - 0x20;
+ $cns_column = oct("0x" . $3) - 0x20;
+ isValidUtf32($utf32)
+ or die "Bad UTF32 char U+" . printUtf32($utf32);
+ isValidCns116431992($cns_plane, $cns_row, $cns_column)
+ or die "Bad CNS11643-1992 char "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column);
+ if ($cns_plane <= 2)
+ {
+ if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
+ $cns_plane_used[$cns_plane] = 1;
+ ++$count_CNS11643_TXT;
+ }
+ else
+ {
+ ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
+ or die "Mapping "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column)
+ . " to "
+ . printUtf32($cns_map[$cns_plane]
+ [$cns_row]
+ [$cns_column])
+ . ", NOT "
+ . printUtf32($utf32);
+ }
+ }
+ }
+ }
+ close IN;
+}
+
+if (0)
+{
+ $filename = "Uni2CNS";
+ open IN, ("input/" . $filename) or die "Cannot read " . $filename;
+ while (<IN>)
+ {
+ if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/)
+ {
+ $utf32 = oct("0x" . $1);
+ $cns_plane = oct("0x" . $2);
+ $cns_row = oct("0x" . $3) - 0x20;
+ $cns_column = oct("0x" . $4) - 0x20;
+ isValidUtf32($utf32)
+ or die "Bad UTF32 char U+" . printUtf32($utf32);
+ isValidCns116431992($cns_plane, $cns_row, $cns_column)
+ or die "Bad CNS11643-1992 char "
+ . printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column);
+ if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
+ $cns_plane_used[$cns_plane] = 1;
+ ++$count_Uni2CNS;
+ }
+ else
+ {
+# ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
+# or die "Mapping "
+# . printCns116431992($cns_plane,
+# $cns_row,
+# $cns_column)
+# . " to "
+# . printUtf32($cns_map[$cns_plane]
+# [$cns_row]
+# [$cns_column])
+# . ", NOT "
+# . printUtf32($utf32);
+ }
+ if ($cns_plane == 1)
+ {
+ print printCns116431992($cns_plane, $cns_row, $cns_column),
+ "\n";
+ }
+ }
+ }
+ close IN;
+}
+
+for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
+{
+ if (defined($cns_plane_used[$cns_plane]))
+ {
+ for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
+ {
+ for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
+ {
+ if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
+ $uni_plane = $utf32 >> 16;
+ $uni_page = ($utf32 >> 8) & 0xFF;
+ $uni_index = $utf32 & 0xFF;
+ if (!defined($uni_plane_used[$uni_plane])
+ || !defined($uni_page_used[$uni_plane][$uni_page])
+ || !defined($uni_map[$uni_plane]
+ [$uni_page]
+ [$uni_index]))
+ {
+ $uni_map[$uni_plane][$uni_page][$uni_index]
+ = ($cns_plane << 16)
+ | ($cns_row << 8)
+ | $cns_column;
+ $uni_plane_used[$uni_plane] = 1;
+ $uni_page_used[$uni_plane][$uni_page] = 1;
+ }
+ else
+ {
+ $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index];
+ $cns1_plane = $cns1 >> 16;
+ $cns1_row = ($cns1 >> 8) & 0xFF;
+ $cns1_column = $cns1 & 0xFF;
+
+ # Do not map from Unicode to Fictitious Character Set
+ # Extensions (Lunde, p. 131), if possible:
+ if ($cns_plane == 3
+ && ($cns_row == 66 && $cns_column > 38
+ || $cns_row > 66))
+ {
+ print " (",
+ printUtf32($utf32),
+ " to fictitious ",
+ printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column),
+ " ignored, favouring ",
+ printCns116431992($cns1_plane,
+ $cns1_row,
+ $cns1_column),
+ ")\n";
+ }
+ elsif ($cns1_plane == 3
+ && ($cns1_row == 66 && $cns1_column > 38
+ || $cns1_row > 66))
+ {
+ $uni_map[$uni_plane][$uni_page][$uni_index]
+ = ($cns_plane << 16)
+ | ($cns_row << 8)
+ | $cns_column;
+ print " (",
+ printUtf32($utf32),
+ " to fictitious ",
+ printCns116431992($cns1_plane,
+ $cns1_row,
+ $cns1_column),
+ " ignored, favouring ",
+ printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column),
+ ")\n";
+ }
+ else
+ {
+ print "WARNING! Mapping ",
+ printUtf32($utf32),
+ " to ",
+ printCns116431992($cns1_plane,
+ $cns1_row,
+ $cns1_column),
+ ", NOT ",
+ printCns116431992($cns_plane,
+ $cns_row,
+ $cns_column),
+ "\n";
+ }
+ }
+ }
+ }
+ }
+ }
+}
+if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
+{
+ for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
+ {
+ if (defined($uni_map[0][0][$uni_index]))
+ {
+ $cns = $uni_map[0][0][$utf32];
+ die "Mapping "
+ . printUtf32($utf32)
+ . " to "
+ . printCns116431992($cns >> 16,
+ ($cns >> 8) & 0xFF,
+ $cns & 0xFF);
+ }
+ }
+}
+
+$filename = lc($id) . ".tab";
+open OUT, ("> " . $filename) or die "Cannot write " . $filename;
+
+{
+ $filename = lc($id). ".pl";
+ open IN, $filename or die "Cannot read ". $filename;
+ $first = 1;
+ while (<IN>)
+ {
+ if (/^\#!.*$/)
+ {
+ }
+ elsif (/^\#(\*.*)$/)
+ {
+ if ($first == 1)
+ {
+ print OUT "/", $1, "\n";
+ $first = 0;
+ }
+ else
+ {
+ print OUT " ", substr($1, 0, length($1) - 1), "/\n";
+ }
+ }
+ elsif (/^\# (.*)$/)
+ {
+ print OUT " *", $1, "\n";
+ }
+ elsif (/^\#(.*)$/)
+ {
+ print OUT " *", $1, "\n";
+ }
+ else
+ {
+ goto done;
+ }
+ }
+ done:
+}
+
+print OUT "\n",
+ "#include \"sal/types.h\"\n",
+ "\n";
+
+print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
+$cns_data_index = 0;
+for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
+{
+ if (defined($cns_plane_used[$cns_plane]))
+ {
+ $cns_rows = 0;
+ $cns_chars = 0;
+ for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
+ {
+ $cns_row_first = -1;
+ for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
+ {
+ if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ if ($cns_row_first == -1)
+ {
+ $cns_row_first = $cns_column;
+ }
+ $cns_row_last = $cns_column;
+ }
+ }
+ if ($cns_row_first != -1)
+ {
+ $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index;
+ ++$cns_rows;
+ print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
+ " */\n";
+
+ $cns_row_surrogates_first = -1;
+ $cns_row_chars = 0;
+ $cns_row_surrogates = 0;
+
+ print OUT " ", $cns_row_first, " | (", $cns_row_last,
+ " << 8), /* first, last */\n";
+ ++$cns_data_index;
+
+ print OUT " ", printSpaces(7, 10, $cns_row_first);
+ $bol = 0;
+ for ($cns_column = $cns_row_first;
+ $cns_column <= $cns_row_last;
+ ++$cns_column)
+ {
+ if ($bol == 1)
+ {
+ print OUT " ";
+ $bol = 0;
+ }
+ if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
+ {
+ $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
+ ++$cns_row_chars;
+ if ($utf32 <= 0xFFFF)
+ {
+ printf OUT "0x%04X,", $utf32;
+ }
+ else
+ {
+ ++$cns_row_surrogates;
+ printf OUT "0x%04X,",
+ (0xD800 | (($utf32 - 0x10000) >> 10));
+ if ($cns_row_surrogates_first == -1)
+ {
+ $cns_row_surrogates_first = $cns_column;
+ }
+ $cns_row_surrogates_last = $cns_column;
+ }
+ }
+ else
+ {
+ printf OUT "0xffff,";
+ }
+ ++$cns_data_index;
+ if ($cns_column % 10 == 9)
+ {
+ print OUT "\n";
+ $bol = 1;
+ }
+ }
+ if ($bol == 0)
+ {
+ print OUT "\n";
+ }
+
+ if ($cns_row_surrogates_first != -1)
+ {
+ print OUT " ", $cns_row_surrogates_first,
+ ", /* first low-surrogate */\n";
+ ++$cns_data_index;
+
+ print OUT " ",
+ printSpaces(7, 10, $cns_row_surrogates_first);
+ $bol = 0;
+ for ($cns_column = $cns_row_surrogates_first;
+ $cns_column <= $cns_row_surrogates_last;
+ ++$cns_column)
+ {
+ if ($bol == 1)
+ {
+ print OUT " ";
+ $bol = 0;
+ }
+ $utf32 = 0;
+ if (defined($cns_map[$cns_plane]
+ [$cns_row]
+ [$cns_column]))
+ {
+ $utf32
+ = $cns_map[$cns_plane][$cns_row][$cns_column];
+ }
+ if ($utf32 <= 0xFFFF)
+ {
+ printf OUT " 0,";
+ }
+ else
+ {
+ printf OUT "0x%04X,",
+ (0xDC00
+ | (($utf32 - 0x10000) & 0x3FF));
+ }
+ ++$cns_data_index;
+ if ($cns_column % 10 == 9)
+ {
+ print OUT "\n";
+ $bol = 1;
+ }
+ }
+ if ($bol == 0)
+ {
+ print OUT "\n";
+ }
+ }
+
+ $cns_chars += $cns_row_chars;
+ $cns_data_space[$cns_plane][$cns_row]
+ = ($cns_data_index
+ - $cns_data_offsets[$cns_plane][$cns_row]) * 2;
+ $cns_data_used[$cns_plane][$cns_row]
+ = (1 + $cns_row_chars
+ + ($cns_row_surrogates == 0 ?
+ 0 : 1 + $cns_row_surrogates)) * 2;
+ }
+ else
+ {
+ print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
+ ": --- */\n";
+ $cns_data_offsets[$cns_plane][$cns_row] = -1;
+ }
+ }
+ print "cns plane ",
+ $cns_plane,
+ ": rows = ",
+ $cns_rows,
+ ", chars = ",
+ $cns_chars,
+ "\n";
+ }
+}
+print OUT "};\n\n";
+
+print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
+for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
+{
+ if (defined ($cns_plane_used[$cns_plane]))
+ {
+ $cns_rowoffsets_used[$cns_plane] = 0;
+ for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
+ {
+ if ($cns_data_offsets[$cns_plane][$cns_row] == -1)
+ {
+ print OUT " -1, /* plane ",
+ $cns_plane,
+ ", row ",
+ $cns_row,
+ " */\n";
+ }
+ else
+ {
+ print OUT " ",
+ $cns_data_offsets[$cns_plane][$cns_row],
+ ", /* plane ",
+ $cns_plane,
+ ", row ",
+ $cns_row,
+ "; ",
+ printStats($cns_data_used[$cns_plane][$cns_row],
+ $cns_data_space[$cns_plane][$cns_row]),
+ " */\n";
+ $cns_rowoffsets_used[$cns_plane] += 4;
+ }
+ }
+ }
+ else
+ {
+ print OUT " /* plane ", $cns_plane, ": --- */\n";
+ }
+}
+print OUT "};\n\n";
+
+print OUT "static sal_Int32 const aImpl",
+ $id,
+ "ToUnicodePlaneOffsets[] = {\n";
+$cns_row_offset = 0;
+for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
+{
+ if (defined ($cns_plane_used[$cns_plane]))
+ {
+ print OUT " ",
+ $cns_row_offset++,
+ " * 94, /* plane ",
+ $cns_plane,
+ "; ",
+ printStats($cns_rowoffsets_used[$cns_plane], 94 * 4),
+ " */\n";
+ }
+ else
+ {
+ print OUT " -1, /* plane ", $cns_plane, " */\n";
+ }
+}
+print OUT "};\n\n";
+
+print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n";
+$uni_data_index = 0;
+for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
+{
+ if (defined($uni_plane_used[$uni_plane]))
+ {
+ for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
+ {
+ if (defined($uni_page_used[$uni_plane][$uni_page]))
+ {
+ $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
+ print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
+ " */\n";
+
+ $uni_page_first = -1;
+ for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
+ {
+ if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
+ {
+ if ($uni_page_first == -1)
+ {
+ $uni_page_first = $uni_index;
+ }
+ $uni_page_last = $uni_index;
+ }
+ }
+
+ $uni_data_used[$uni_plane][$uni_page] = 0;
+
+ print OUT " ", $uni_page_first, ", ", $uni_page_last,
+ ", /* first, last */\n";
+ $uni_data_index += 2;
+ $uni_data_used[$uni_plane][$uni_page] += 2;
+
+ print OUT " ", printSpaces(9, 8, $uni_page_first);
+ $bol = 0;
+ for ($uni_index = $uni_page_first;
+ $uni_index <= $uni_page_last;
+ ++$uni_index)
+ {
+ if ($bol == 1)
+ {
+ print OUT " ";
+ $bol = 0;
+ }
+ if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
+ {
+ $cns = $uni_map[$uni_plane][$uni_page][$uni_index];
+ printf OUT "%2d,%2d,%2d,",
+ $cns >> 16,
+ $cns >> 8 & 0xFF,
+ $cns & 0xFF;
+ $uni_data_used[$uni_plane][$uni_page] += 3;
+ }
+ else
+ {
+ print OUT " 0, 0, 0,";
+ }
+ $uni_data_index += 3;
+ if ($uni_index % 8 == 7)
+ {
+ print OUT "\n";
+ $bol = 1;
+ }
+ }
+ if ($bol == 0)
+ {
+ print OUT "\n";
+ }
+
+ $uni_data_space[$uni_plane][$uni_page]
+ = $uni_data_index
+ - $uni_data_offsets[$uni_plane][$uni_page];
+ }
+ else
+ {
+ $uni_data_offsets[$uni_plane][$uni_page] = -1;
+ print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
+ ": --- */\n";
+ }
+ }
+ }
+ else
+ {
+ print OUT " /* plane ", $uni_plane, ": --- */\n";
+ }
+}
+print OUT "};\n\n";
+
+print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
+for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
+{
+ if (defined($uni_plane_used[$uni_plane]))
+ {
+ $uni_pageoffsets_used[$uni_plane] = 0;
+ $uni_data_used_sum[$uni_plane] = 0;
+ $uni_data_space_sum[$uni_plane] = 0;
+ for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
+ {
+ $offset = $uni_data_offsets[$uni_plane][$uni_page];
+ if ($offset == -1)
+ {
+ print OUT " -1, /* plane ",
+ $uni_plane,
+ ", page ",
+ $uni_page,
+ " */\n";
+ }
+ else
+ {
+ print OUT " ",
+ $offset,
+ ", /* plane ",
+ $uni_plane,
+ ", page ",
+ $uni_page,
+ "; ",
+ printStats($uni_data_used[$uni_plane][$uni_page],
+ $uni_data_space[$uni_plane][$uni_page]),
+ " */\n";
+ $uni_pageoffsets_used[$uni_plane] += 4;
+ $uni_data_used_sum[$uni_plane]
+ += $uni_data_used[$uni_plane][$uni_page];
+ $uni_data_space_sum[$uni_plane]
+ += $uni_data_space[$uni_plane][$uni_page];
+ }
+ }
+ }
+ else
+ {
+ print OUT " /* plane ", $uni_plane, ": --- */\n";
+ }
+}
+print OUT "};\n\n";
+
+print OUT "static sal_Int32 const aImplUnicodeTo",
+ $id,
+ "PlaneOffsets[] = {\n";
+$uni_page_offset = 0;
+$uni_planeoffsets_used = 0;
+$uni_pageoffsets_used_sum = 0;
+$uni_pageoffsets_space_sum = 0;
+$uni_data_used_sum2 = 0;
+$uni_data_space_sum2 = 0;
+for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
+{
+ if (defined ($uni_plane_used[$uni_plane]))
+ {
+ print OUT " ",
+ $uni_page_offset++,
+ " * 256, /* plane ",
+ $uni_plane,
+ "; ",
+ printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
+ ", ",
+ printStats($uni_data_used_sum[$uni_plane],
+ $uni_data_space_sum[$uni_plane]),
+ " */\n";
+ $uni_planeoffsets_used += 4;
+ $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
+ $uni_pageoffsets_space_sum += 256 * 4;
+ $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
+ $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
+ }
+ else
+ {
+ print OUT " -1, /* plane ", $uni_plane, " */\n";
+ }
+}
+print OUT " /* ",
+ printStats($uni_planeoffsets_used, 17 * 4),
+ ", ",
+ printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
+ ", ",
+ printStats($uni_data_used_sum2, $uni_data_space_sum2),
+ " */\n};\n";
+
+close OUT;
+
+print "Unihan.txt = ", $count_Unihan_txt,
+ ", CNS11643.TXT = ", $count_CNS11643_TXT,
+ ", Uni2CNS = ", $count_Uni2CNS,
+ ", total = ",
+ ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS),
+ "\n";