summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/tools
diff options
context:
space:
mode:
Diffstat (limited to 'intl/lwbrk/tools')
-rw-r--r--intl/lwbrk/tools/anzx4051.html709
-rw-r--r--intl/lwbrk/tools/anzx4051.pl356
-rw-r--r--intl/lwbrk/tools/jisx4051class.txt159
-rw-r--r--intl/lwbrk/tools/jisx4051simp.txt24
-rw-r--r--intl/lwbrk/tools/spec_table.html664
5 files changed, 1912 insertions, 0 deletions
diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html
new file mode 100644
index 0000000000..9f3461a285
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.html
@@ -0,0 +1,709 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<html>
+ <head>
+ <title>Analysis of JIS X 4051 to Unicode General Category Mapping</title>
+ </head>
+ <body>
+ <h1>Analysis of JIS X 4051 to Unicode General Category Mapping</h1>
+ <table border="3">
+ <tr bgcolor="blue">
+ <th></th>
+ <th></th>
+ <td bgcolor="red">C</td>
+ <td bgcolor="red">L</td>
+ <td bgcolor="red">M</td>
+ <td bgcolor="red">N</td>
+ <td bgcolor="red">P</td>
+ <td bgcolor="red">S</td>
+ <td bgcolor="red">Z</td>
+ <td bgcolor="white">Total</td>
+ <td bgcolor="yellow">Cc</td>
+ <td bgcolor="yellow">Cf</td>
+ <td bgcolor="yellow">Co</td>
+ <td bgcolor="yellow">Cs</td>
+ <td bgcolor="yellow">Ll</td>
+ <td bgcolor="yellow">Lm</td>
+ <td bgcolor="yellow">Lo</td>
+ <td bgcolor="yellow">Lt</td>
+ <td bgcolor="yellow">Lu</td>
+ <td bgcolor="yellow">Mc</td>
+ <td bgcolor="yellow">Me</td>
+ <td bgcolor="yellow">Mn</td>
+ <td bgcolor="yellow">Nd</td>
+ <td bgcolor="yellow">Nl</td>
+ <td bgcolor="yellow">No</td>
+ <td bgcolor="yellow">Pc</td>
+ <td bgcolor="yellow">Pd</td>
+ <td bgcolor="yellow">Pe</td>
+ <td bgcolor="yellow">Pf</td>
+ <td bgcolor="yellow">Pi</td>
+ <td bgcolor="yellow">Po</td>
+ <td bgcolor="yellow">Ps</td>
+ <td bgcolor="yellow">Sc</td>
+ <td bgcolor="yellow">Sk</td>
+ <td bgcolor="yellow">Sm</td>
+ <td bgcolor="yellow">So</td>
+ <td bgcolor="yellow">Zl</td>
+ <td bgcolor="yellow">Zp</td>
+ <td bgcolor="yellow">Zs</td>
+ </tr>
+ <tr>
+ <th>00_1</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>14</td>
+ <td>1</td>
+ <td></td>
+ <td bgcolor="white">15</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td>2</td>
+ <td>11</td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>01_[a]</th>
+ <th></th>
+ <td></td>
+ <td>32</td>
+ <td>2</td>
+ <td></td>
+ <td>31</td>
+ <td>3</td>
+ <td></td>
+ <td bgcolor="white">68</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>8</td>
+ <td>24</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>2</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td>12</td>
+ <td>1</td>
+ <td></td>
+ <td>17</td>
+ <td></td>
+ <td></td>
+ <td>2</td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>02_7</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td bgcolor="white">1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>03_8</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td bgcolor="white">1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>04_9</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>5</td>
+ <td></td>
+ <td></td>
+ <td bgcolor="white">5</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>5</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>05_[b]</th>
+ <th></th>
+ <td>33</td>
+ <td>153</td>
+ <td></td>
+ <td>33</td>
+ <td>2</td>
+ <td>5</td>
+ <td>13</td>
+ <td bgcolor="white">239</td>
+ <td>32</td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>153</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>33</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>2</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>5</td>
+ <td></td>
+ <td></td>
+ <td>13</td>
+ </tr>
+ <tr>
+ <th>06_15</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>30</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td bgcolor="white">30</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>30</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>07_18</th>
+ <th></th>
+ <td>18</td>
+ <td>157</td>
+ <td></td>
+ <td>33</td>
+ <td>56</td>
+ <td>125</td>
+ <td>2</td>
+ <td bgcolor="white">391</td>
+ <td></td>
+ <td>18</td>
+ <td></td>
+ <td></td>
+ <td>64</td>
+ <td>7</td>
+ <td>5</td>
+ <td></td>
+ <td>81</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>3</td>
+ <td>30</td>
+ <td>4</td>
+ <td>5</td>
+ <td>2</td>
+ <td></td>
+ <td>5</td>
+ <td>36</td>
+ <td>4</td>
+ <td></td>
+ <td>3</td>
+ <td>24</td>
+ <td>98</td>
+ <td>1</td>
+ <td>1</td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>08_COMPLEX</th>
+ <th></th>
+ <td></td>
+ <td>54</td>
+ <td>33</td>
+ <td>20</td>
+ <td>2</td>
+ <td>1</td>
+ <td></td>
+ <td bgcolor="white">110</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td>53</td>
+ <td></td>
+ <td></td>
+ <td>11</td>
+ <td></td>
+ <td>22</td>
+ <td>10</td>
+ <td></td>
+ <td>10</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>2</td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>09_[c]</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>3</td>
+ <td>4</td>
+ <td></td>
+ <td bgcolor="white">7</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>3</td>
+ <td>2</td>
+ <td></td>
+ <td>2</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0A_[d]</th>
+ <th></th>
+ <td>1</td>
+ <td>2</td>
+ <td></td>
+ <td>6</td>
+ <td>25</td>
+ <td>14</td>
+ <td></td>
+ <td bgcolor="white">48</td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>6</td>
+ <td></td>
+ <td></td>
+ <td>3</td>
+ <td>3</td>
+ <td></td>
+ <td>19</td>
+ <td></td>
+ <td>2</td>
+ <td>3</td>
+ <td>7</td>
+ <td>2</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0B_[e]</th>
+ <th></th>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td>1</td>
+ <td>3</td>
+ <td bgcolor="white">6</td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <th>X</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td bgcolor="white">0</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </table>
+ <table border="3">
+ <tr bgcolor="blue">
+ <th></th>
+ <th></th>
+ <td bgcolor="red">00_1</td>
+ <td bgcolor="red">01_[a]</td>
+ <td bgcolor="red">02_7</td>
+ <td bgcolor="red">03_8</td>
+ <td bgcolor="red">04_9</td>
+ <td bgcolor="red">05_[b]</td>
+ <td bgcolor="red">06_15</td>
+ <td bgcolor="red">07_18</td>
+ <td bgcolor="red">08_COMPLEX</td>
+ <td bgcolor="red">09_[c]</td>
+ <td bgcolor="red">0A_[d]</td>
+ <td bgcolor="red">0B_[e]</td>
+ <td bgcolor="red">X</td>
+ </tr>
+ <tr>
+ <th>00</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>33</td>
+ <td>10</td>
+ <td>127</td>
+ <td></td>
+ <td>7</td>
+ <td>44</td>
+ <td>2</td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0E</th>
+ <th></th>
+ <td>1</td>
+ <td>6</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>20</td>
+ <td>1</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>17</th>
+ <th></th>
+ <td>2</td>
+ <td>4</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>110</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>20</th>
+ <th></th>
+ <td>2</td>
+ <td>11</td>
+ <td>1</td>
+ <td></td>
+ <td>5</td>
+ <td>13</td>
+ <td></td>
+ <td>100</td>
+ <td></td>
+ <td></td>
+ <td>4</td>
+ <td>4</td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>21</th>
+ <th></th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>1</td>
+ <td></td>
+ <td>32</td>
+ <td></td>
+ <td>163</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>30</th>
+ <th></th>
+ <td>10</td>
+ <td>47</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>161</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </table>
+ </body>
+</html>
diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl
new file mode 100644
index 0000000000..e76eac6207
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.pl
@@ -0,0 +1,356 @@
+#!/usr/bin/perl
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+######################################################################
+#
+# Initial global variable
+#
+######################################################################
+%utot = ();
+$ui=0;
+$li=0;
+
+######################################################################
+#
+# Open the unicode database file
+#
+######################################################################
+open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt")
+ || die "cannot find UnicodeData-Latest.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class file
+#
+######################################################################
+open ( CLASS , "< jisx4051class.txt")
+ || die "cannot find jisx4051class.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class simplified mapping
+#
+######################################################################
+open ( SIMP , "< jisx4051simp.txt")
+ || die "cannot find jisx4051simp.txt";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( OUT , "> anzx4051.html")
+ || die "cannot open output anzx4051.html file";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( HEADER , "> ../jisx4051class.h")
+ || die "cannot open output ../jisx4051class.h file";
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$hthmlheader = <<END_OF_HTML;
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<HTML>
+<HEAD>
+<TITLE>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</TITLE>
+</HEAD>
+<BODY>
+<H1>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</H1>
+END_OF_HTML
+print OUT $hthmlheader;
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$npl = <<END_OF_NPL;
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
+ mozilla/intl/lwbrk/tools/anzx4051.pl
+ */
+END_OF_NPL
+print HEADER $npl;
+
+%occ = ();
+%gcat = ();
+%dcat = ();
+%simp = ();
+%gcount = ();
+%dcount = ();
+%sccount = ();
+%rangecount = ();
+
+######################################################################
+#
+# Process the file line by line
+#
+######################################################################
+while(<UNICODATA>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+ $c = $f[0]; # The unicode value
+ $g = $f[2];
+ $d = substr($g, 0, 1);
+
+ $gcat{$c} = $g;
+ $dcat{$c} = $d;
+ $gcount{$g}++;
+ $dcount{$d}++;
+}
+close(UNIDATA);
+
+while(<SIMP>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+
+ $simp{$f[0]} = $f[1];
+ $sccount{$f[1]}++;
+}
+close(SIMP);
+
+sub GetClass{
+ my ($u) = @_;
+ my $hex = DecToHex($u);
+ $g = $gcat{$hex};
+ if($g ne "") {
+ return $g;
+ } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
+ return "Han";
+ } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
+ return "Lo";
+ } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
+ return "Cs";
+ } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
+ return "Cs";
+ } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
+ return "Cs";
+ } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
+ return "Co";
+ } else {
+ printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
+ }
+}
+sub GetDClass{
+ my ($u) = @_;
+ my $hex = DecToHex($u);
+ $g = $dcat{$hex};
+ if($g ne "") {
+ return $g;
+ } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
+ return "Han";
+ } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
+ return "L";
+ } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
+ return "C";
+ } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
+ return "C";
+ } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
+ return "C";
+ } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
+ return "C";
+ } else {
+ printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
+ }
+}
+sub DecToHex{
+ my ($d) = @_;
+ return sprintf("%04X", $d);
+}
+%gtotal = ();
+%dtotal = ();
+while(<CLASS>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+
+ if( substr($f[2], 0, 1) ne "a")
+ {
+ $sc = $simp{$f[2]};
+ $l = hex($f[0]);
+ if($f[1] eq "")
+ {
+ $h = $l;
+ } else {
+ $h = hex($f[1]);
+ }
+ for($k = $l; $k <= $h ; $k++)
+ {
+ if( exists($occ{$k}))
+ {
+ # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n",
+ # DecToHex($k), $occ{$k} , $f[2] , $sc;
+ }
+ else
+ {
+ $occ{$k} = $sc . " | " . $f[2];
+ $gclass = GetClass($k);
+ $dclass = GetDClass($k);
+ $gtotal{$sc . $gclass}++;
+ $dtotal{$sc . $dclass}++;
+ $u = DecToHex($k);
+ $rk = " " . substr($u,0,2) . ":" . $sc;
+ $rangecount{$rk}++;
+ }
+ }
+ }
+}
+
+#print %gtotal;
+#print %dtotal;
+
+sub printreport
+{
+ print OUT "<TABLE BORDER=3>\n";
+ print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+
+ foreach $d (sort(keys %dcount)) {
+ print OUT "<TD BGCOLOR=red>$d</TD>\n";
+ }
+
+ print OUT "<TD BGCOLOR=white>Total</TD>\n";
+ foreach $g (sort(keys %gcount)) {
+ print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
+ }
+ print OUT "</TR>\n";
+ foreach $sc (sort(keys %sccount)) {
+
+ print OUT "<TR><TH>$sc<TH>\n";
+
+ $total = 0;
+ foreach $d (sort (keys %dcount)) {
+ $count = $dtotal{$sc . $d};
+ $total += $count;
+ print OUT "<TD>$count</TD>\n";
+ }
+
+ print OUT "<TD BGCOLOR=white>$total</TD>\n";
+
+ foreach $g (sort(keys %gcount)) {
+ $count = $gtotal{$sc . $g};
+ print OUT "<TD>$count</TD>\n";
+ }
+
+
+ print OUT "</TR>\n";
+ }
+ print OUT "</TABLE>\n";
+
+
+ print OUT "<TABLE BORDER=3>\n";
+ print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+
+ foreach $sc (sort(keys %sccount))
+ {
+ print OUT "<TD BGCOLOR=red>$sc</TD>\n";
+ }
+
+ print OUT "</TR>\n";
+
+
+ for($rr = 0; $rr < 0x4f; $rr++)
+ {
+ $empty = 0;
+ $r = sprintf("%02X" , $rr) ;
+ $tmp = "<TR><TH>" . $r . "<TH>\n";
+
+ foreach $sc (sort(keys %sccount)) {
+ $count = $rangecount{ " " .$r . ":" .$sc};
+ $tmp .= sprintf("<TD>%s</TD>\n", $count);
+ $empty += $count;
+ }
+
+ $tmp .= "</TR>\n";
+
+ if($empty ne 0)
+ {
+ print OUT $tmp;
+ }
+ }
+ print OUT "</TABLE>\n";
+
+}
+printreport();
+
+sub printarray
+{
+ my($r, $def) = @_;
+printf "[%s || %s]\n", $r, $def;
+ $k = hex($r) * 256;
+ printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
+ for($i = 0 ; $i < 256; $i+= 8)
+ {
+ for($j = 7 ; $j >= 0; $j-- )
+ {
+ $v = $k + $i + $j;
+ if( exists($occ{$v}))
+ {
+ $p = substr($occ{$v}, 1,1);
+ } else {
+ $p = $def;
+ }
+
+ if($j eq 7 )
+ {
+ printf HEADER "0x%s" , $p;
+ } else {
+ printf HEADER "%s", $p ;
+ }
+ }
+ printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
+ }
+ print HEADER "};\n\n";
+}
+printarray("00", "7");
+printarray("20", "7");
+printarray("21", "7");
+printarray("30", "5");
+printarray("0E", "8");
+printarray("17", "7");
+
+#print %rangecount;
+
+######################################################################
+#
+# Close files
+#
+######################################################################
+close(HEADER);
+close(CLASS);
+close(OUT);
+
diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt
new file mode 100644
index 0000000000..c435c1ae55
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051class.txt
@@ -0,0 +1,159 @@
+0000;001f;17
+0020;;17
+0024;;24
+0027;;18
+0028;;22
+002D;;18
+002F;;18
+0021;002F;23
+0030;0039;15
+003C;;22
+003A;003F;23
+0040;;18
+0041;005A;18
+005B;;22
+005E;;18
+005F;;18
+005B;005F;23
+0060;;18
+0061;007A;18
+007B;;22
+007B;007E;23
+00A0;;24
+00A3;;22
+00A5;;22
+00A9;;18
+00AA;;18
+00AB;;18
+00AC;;22
+00AE;;18
+00AF;;18
+00A1;00BF;23
+00B0;;18
+00F7;;23
+00C0;00FF;18
+0E3F;;1
+0E2F;;4
+0E46;;4
+0E5A;0E5B;4
+0E50;0E59;15
+0E4F;;18
+0EAF;;4
+0EC6;;4
+0ED0;0ED9;15
+1735;1736;1
+17D4;17D5;4
+17D8;;4
+17DA;;4
+1780;17DD;21
+17E0;17E9;21
+17F0;17F9;21
+2007;;24
+2000;200B;17
+200C;200F;18
+2010;;18
+2011;;24
+2012;2013;18
+2014;;7
+2015;;18
+2016;2017;18
+2019;;23
+201D;;23
+2018;201F;18
+2020;2023;18
+2024;2026;2
+2027;;23
+2028;202E;18
+202F;;24
+2030;2034;9
+2035;2038;18
+2039;;1
+203A;;2
+203B;;12
+203C;203D;3
+203E;;23
+203F;2043;18
+2044;;3
+2045;;1
+2046;;2
+2047;2049;3
+204A;205E;18
+205F;;17
+2060;;24
+2061;2063;18
+206A;206F;18
+2070;2071;18
+2074;208E;18
+2090;2094;18
+2116;;8
+2160;217F;12
+2190;21EA;a12
+2126;;18
+2100;2138;18
+2153;2182;18
+2190;21EA;18
+3008;;1
+300A;;1
+300C;;1
+300E;;1
+3010;;1
+3014;;1
+3016;;1
+3018;;1
+301A;;1
+301D;;1
+3001;;2
+3009;;2
+300B;;2
+300D;;2
+300F;;2
+3011;;2
+3015;;2
+3017;;2
+3019;;2
+301B;;2
+301E;;2
+301F;;2
+3005;;3
+301C;;3
+3041;;3
+3043;;3
+3045;;3
+3047;;3
+3049;;3
+3063;;3
+3083;;3
+3085;;3
+3087;;3
+308E;;3
+309D;;3
+309E;;3
+30A1;;3
+30A3;;3
+30A5;;3
+30A7;;3
+30A9;;3
+30C3;;3
+30E3;;3
+30E5;;3
+30E7;;3
+30EE;;3
+30F5;;3
+30F6;;3
+30FC;;3
+30FD;;3
+30FE;;3
+30FB;;5
+3002;;6
+3000;;10
+3042;3094;11
+3099;309E;3
+3003;;12
+3004;;12
+3006;;12
+3007;;12
+3012;;12
+3013;;12
+3020;;12
+3036;;12
+30A2;30FA;12
diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt
new file mode 100644
index 0000000000..e12a7fd805
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051simp.txt
@@ -0,0 +1,24 @@
+1;00_1
+2;01_[a]
+3;01_[a]
+4;01_[a]
+5;01_[a]
+6;01_[a]
+7;02_7
+8;03_8
+9;04_9
+10;05_[b]
+11;05_[b]
+12;05_[b]
+13;X
+14;X
+15;06_15
+16;X
+17;05_[b]
+18;07_18
+19;X
+20;X
+21;08_COMPLEX
+22;09_[c]
+23;0A_[d]
+24;0B_[e]
diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html
new file mode 100644
index 0000000000..b7a642a332
--- /dev/null
+++ b/intl/lwbrk/tools/spec_table.html
@@ -0,0 +1,664 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <title></title>
+ <style type="text/css">
+ table {
+ border: solid 1px;
+ border-collapse: collapse;
+ }
+ tbody,
+ tfoot {
+ border-top: solid 2px;
+ }
+ td,
+ th {
+ border: solid 1px;
+ }
+ td {
+ text-align: center;
+ }
+ </style>
+ </head>
+ <body>
+ <p>This is a specification table for line breaking.</p>
+ <p>
+ The values of IE7 and Opera9: 'A' means that the line is breakable After
+ the character, and 'B' means Before. 'BA' means Before and After.
+ </p>
+ <p>
+ (C) which is the tail of the IE7 and the Opera9 means Character. (N) means
+ Numeric. This means that they are around the character at testing. E.g.,
+ "a$a" is a testcase for (C), "0$0" is a testcase for (N).
+ </p>
+ <p>
+ Gecko is not breaking the lines on most western language context. But for
+ file paths, URLs and very long word which is connected hyphens, some
+ characters might be breakable. They are 'breakable' in the table. However,
+ they are not always breakable, they <em>depend on the context</em> in the
+ word.
+ </p>
+ <table border="1">
+ <thead>
+ <tr>
+ <th colspan="2">character</th>
+ <th>Gecko</th>
+ <th>IE7(C)</th>
+ <th>IE7(N)</th>
+ <th>Opera9.2(C)</th>
+ <th>Opera9.2(N)</th>
+ </tr>
+ </thead>
+ <tfoot>
+ <tr>
+ <th colspan="2">character</th>
+ <th>Gecko</th>
+ <th>IE7(C)</th>
+ <th>IE7(N)</th>
+ <th>Opera9.2(C)</th>
+ <th>Opera9.2(N)</th>
+ </tr>
+ </tfoot>
+ <tbody>
+ <tr>
+ <th>0x21</th>
+ <th>&#x21;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x22</th>
+ <th>&#x22;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x23</th>
+ <th>&#x23;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x24</th>
+ <th>&#x24;</th>
+ <td></td>
+ <td></td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x25</th>
+ <th>&#x25;</th>
+ <td>breakable</td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x26</th>
+ <th>&#x26;</th>
+ <td>breakable</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x27</th>
+ <th>&#x27;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x28</th>
+ <th>&#x28;</th>
+ <td></td>
+ <td>B</td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x29</th>
+ <th>&#x29;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x2A</th>
+ <th>&#x2A;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x2B</th>
+ <th>&#x2B;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x2C</th>
+ <th>&#x2C;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x2D</th>
+ <th>&#x2D;</th>
+ <td>breakable</td>
+ <td>BA</td>
+ <td>BA</td>
+ <td>A</td>
+ <td>A</td>
+ </tr>
+ <tr>
+ <th>0x2E</th>
+ <th>&#x2E;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x2F</th>
+ <th>&#x2F;</th>
+ <td>breakable</td>
+ <td></td>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0x3A</th>
+ <th>&#x3A;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x3B</th>
+ <th>&#x3B;</th>
+ <td>breakable</td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x3C</th>
+ <th>&#x3C;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x3D</th>
+ <th>&#x3D;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x3E</th>
+ <th>&#x3E;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x3F</th>
+ <th>&#x3F;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0x40</th>
+ <th>&#x40;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0x5B</th>
+ <th>&#x5B;</th>
+ <td></td>
+ <td>B</td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x5C</th>
+ <th>&#x5C;</th>
+ <td>breakable</td>
+ <td></td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x5D</th>
+ <th>&#x5D;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x5E</th>
+ <th>&#x5E;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x5F</th>
+ <th>&#x5F;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0x60</th>
+ <th>&#x60;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0x7B</th>
+ <th>&#x7B;</th>
+ <td></td>
+ <td>B</td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x7C</th>
+ <th>&#x7C;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ </tr>
+ <tr>
+ <th>0x7D</th>
+ <th>&#x7D;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0x7E</th>
+ <th>&#x7E;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0xA1</th>
+ <th>&#xA1;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA2</th>
+ <th>&#xA2;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA3</th>
+ <th>&#xA3;</th>
+ <td></td>
+ <td></td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA4</th>
+ <th>&#xA4;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA5</th>
+ <th>&#xA5;</th>
+ <td></td>
+ <td></td>
+ <td>B</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA6</th>
+ <th>&#xA6;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA7</th>
+ <th>&#xA7;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA8</th>
+ <th>&#xA8;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xA9</th>
+ <th>&#xA9;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xAA</th>
+ <th>&#xAA;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xAB</th>
+ <th>&#xAB;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xAC</th>
+ <th>&#xAC;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xAE</th>
+ <th>&#xAE;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xAF</th>
+ <th>&#xAF;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0xB0</th>
+ <th>&#xB0;</th>
+ <td></td>
+ <td>A</td>
+ <td>A</td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB1</th>
+ <th>&#xB1;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB2</th>
+ <th>&#xB2;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB3</th>
+ <th>&#xB3;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB4</th>
+ <th>&#xB4;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td>B</td>
+ <td>B</td>
+ </tr>
+ <tr>
+ <th>0xB5</th>
+ <th>&#xB5;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB6</th>
+ <th>&#xB6;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB7</th>
+ <th>&#xB7;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB8</th>
+ <th>&#xB8;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xB9</th>
+ <th>&#xB9;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBA</th>
+ <th>&#xBA;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBB</th>
+ <th>&#xBB;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBC</th>
+ <th>&#xBC;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBD</th>
+ <th>&#xBD;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBE</th>
+ <th>&#xBE;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ <tr>
+ <th>0xBF</th>
+ <th>&#xBF;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0xD7</th>
+ <th>&#xD7;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <th>0xF7</th>
+ <th>&#xF7;</th>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ <td></td>
+ </tr>
+ </tbody>
+ </table>
+ </body>
+</html>