diff options
Diffstat (limited to 'intl/lwbrk/tools')
-rw-r--r-- | intl/lwbrk/tools/anzx4051.html | 709 | ||||
-rw-r--r-- | intl/lwbrk/tools/anzx4051.pl | 356 | ||||
-rw-r--r-- | intl/lwbrk/tools/jisx4051class.txt | 159 | ||||
-rw-r--r-- | intl/lwbrk/tools/jisx4051simp.txt | 24 | ||||
-rw-r--r-- | intl/lwbrk/tools/spec_table.html | 664 |
5 files changed, 1912 insertions, 0 deletions
diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html new file mode 100644 index 0000000000..9f3461a285 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.html @@ -0,0 +1,709 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<html> + <head> + <title>Analysis of JIS X 4051 to Unicode General Category Mapping</title> + </head> + <body> + <h1>Analysis of JIS X 4051 to Unicode General Category Mapping</h1> + <table border="3"> + <tr bgcolor="blue"> + <th></th> + <th></th> + <td bgcolor="red">C</td> + <td bgcolor="red">L</td> + <td bgcolor="red">M</td> + <td bgcolor="red">N</td> + <td bgcolor="red">P</td> + <td bgcolor="red">S</td> + <td bgcolor="red">Z</td> + <td bgcolor="white">Total</td> + <td bgcolor="yellow">Cc</td> + <td bgcolor="yellow">Cf</td> + <td bgcolor="yellow">Co</td> + <td bgcolor="yellow">Cs</td> + <td bgcolor="yellow">Ll</td> + <td bgcolor="yellow">Lm</td> + <td bgcolor="yellow">Lo</td> + <td bgcolor="yellow">Lt</td> + <td bgcolor="yellow">Lu</td> + <td bgcolor="yellow">Mc</td> + <td bgcolor="yellow">Me</td> + <td bgcolor="yellow">Mn</td> + <td bgcolor="yellow">Nd</td> + <td bgcolor="yellow">Nl</td> + <td bgcolor="yellow">No</td> + <td bgcolor="yellow">Pc</td> + <td bgcolor="yellow">Pd</td> + <td bgcolor="yellow">Pe</td> + <td bgcolor="yellow">Pf</td> + <td bgcolor="yellow">Pi</td> + <td bgcolor="yellow">Po</td> + <td bgcolor="yellow">Ps</td> + <td bgcolor="yellow">Sc</td> + <td bgcolor="yellow">Sk</td> + <td bgcolor="yellow">Sm</td> + <td bgcolor="yellow">So</td> + <td bgcolor="yellow">Zl</td> + <td bgcolor="yellow">Zp</td> + <td bgcolor="yellow">Zs</td> + </tr> + <tr> + <th>00_1</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td>14</td> + <td>1</td> + <td></td> + <td bgcolor="white">15</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td>2</td> + <td>11</td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>01_[a]</th> + <th></th> + <td></td> + <td>32</td> + <td>2</td> + <td></td> + <td>31</td> + <td>3</td> + <td></td> + <td bgcolor="white">68</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>8</td> + <td>24</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>2</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td>12</td> + <td>1</td> + <td></td> + <td>17</td> + <td></td> + <td></td> + <td>2</td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>02_7</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td bgcolor="white">1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>03_8</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td bgcolor="white">1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>04_9</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td>5</td> + <td></td> + <td></td> + <td bgcolor="white">5</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>5</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>05_[b]</th> + <th></th> + <td>33</td> + <td>153</td> + <td></td> + <td>33</td> + <td>2</td> + <td>5</td> + <td>13</td> + <td bgcolor="white">239</td> + <td>32</td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>153</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>33</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>2</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>5</td> + <td></td> + <td></td> + <td>13</td> + </tr> + <tr> + <th>06_15</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td>30</td> + <td></td> + <td></td> + <td></td> + <td bgcolor="white">30</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>30</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>07_18</th> + <th></th> + <td>18</td> + <td>157</td> + <td></td> + <td>33</td> + <td>56</td> + <td>125</td> + <td>2</td> + <td bgcolor="white">391</td> + <td></td> + <td>18</td> + <td></td> + <td></td> + <td>64</td> + <td>7</td> + <td>5</td> + <td></td> + <td>81</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>3</td> + <td>30</td> + <td>4</td> + <td>5</td> + <td>2</td> + <td></td> + <td>5</td> + <td>36</td> + <td>4</td> + <td></td> + <td>3</td> + <td>24</td> + <td>98</td> + <td>1</td> + <td>1</td> + <td></td> + </tr> + <tr> + <th>08_COMPLEX</th> + <th></th> + <td></td> + <td>54</td> + <td>33</td> + <td>20</td> + <td>2</td> + <td>1</td> + <td></td> + <td bgcolor="white">110</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td>53</td> + <td></td> + <td></td> + <td>11</td> + <td></td> + <td>22</td> + <td>10</td> + <td></td> + <td>10</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>2</td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>09_[c]</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td>3</td> + <td>4</td> + <td></td> + <td bgcolor="white">7</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>3</td> + <td>2</td> + <td></td> + <td>2</td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0A_[d]</th> + <th></th> + <td>1</td> + <td>2</td> + <td></td> + <td>6</td> + <td>25</td> + <td>14</td> + <td></td> + <td bgcolor="white">48</td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>6</td> + <td></td> + <td></td> + <td>3</td> + <td>3</td> + <td></td> + <td>19</td> + <td></td> + <td>2</td> + <td>3</td> + <td>7</td> + <td>2</td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0B_[e]</th> + <th></th> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td>1</td> + <td>3</td> + <td bgcolor="white">6</td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>3</td> + </tr> + <tr> + <th>X</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td bgcolor="white">0</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </table> + <table border="3"> + <tr bgcolor="blue"> + <th></th> + <th></th> + <td bgcolor="red">00_1</td> + <td bgcolor="red">01_[a]</td> + <td bgcolor="red">02_7</td> + <td bgcolor="red">03_8</td> + <td bgcolor="red">04_9</td> + <td bgcolor="red">05_[b]</td> + <td bgcolor="red">06_15</td> + <td bgcolor="red">07_18</td> + <td bgcolor="red">08_COMPLEX</td> + <td bgcolor="red">09_[c]</td> + <td bgcolor="red">0A_[d]</td> + <td bgcolor="red">0B_[e]</td> + <td bgcolor="red">X</td> + </tr> + <tr> + <th>00</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>33</td> + <td>10</td> + <td>127</td> + <td></td> + <td>7</td> + <td>44</td> + <td>2</td> + <td></td> + </tr> + <tr> + <th>0E</th> + <th></th> + <td>1</td> + <td>6</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>20</td> + <td>1</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>17</th> + <th></th> + <td>2</td> + <td>4</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td>110</td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>20</th> + <th></th> + <td>2</td> + <td>11</td> + <td>1</td> + <td></td> + <td>5</td> + <td>13</td> + <td></td> + <td>100</td> + <td></td> + <td></td> + <td>4</td> + <td>4</td> + <td></td> + </tr> + <tr> + <th>21</th> + <th></th> + <td></td> + <td></td> + <td></td> + <td>1</td> + <td></td> + <td>32</td> + <td></td> + <td>163</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>30</th> + <th></th> + <td>10</td> + <td>47</td> + <td></td> + <td></td> + <td></td> + <td>161</td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </table> + </body> +</html> diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl new file mode 100644 index 0000000000..e76eac6207 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.pl @@ -0,0 +1,356 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +###################################################################### +# +# Initial global variable +# +###################################################################### +%utot = (); +$ui=0; +$li=0; + +###################################################################### +# +# Open the unicode database file +# +###################################################################### +open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") + || die "cannot find UnicodeData-Latest.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class file +# +###################################################################### +open ( CLASS , "< jisx4051class.txt") + || die "cannot find jisx4051class.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class simplified mapping +# +###################################################################### +open ( SIMP , "< jisx4051simp.txt") + || die "cannot find jisx4051simp.txt"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( OUT , "> anzx4051.html") + || die "cannot open output anzx4051.html file"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( HEADER , "> ../jisx4051class.h") + || die "cannot open output ../jisx4051class.h file"; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$hthmlheader = <<END_OF_HTML; +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<HTML> +<HEAD> +<TITLE> +Analysis of JIS X 4051 to Unicode General Category Mapping +</TITLE> +</HEAD> +<BODY> +<H1> +Analysis of JIS X 4051 to Unicode General Category Mapping +</H1> +END_OF_HTML +print OUT $hthmlheader; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$npl = <<END_OF_NPL; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY + mozilla/intl/lwbrk/tools/anzx4051.pl + */ +END_OF_NPL +print HEADER $npl; + +%occ = (); +%gcat = (); +%dcat = (); +%simp = (); +%gcount = (); +%dcount = (); +%sccount = (); +%rangecount = (); + +###################################################################### +# +# Process the file line by line +# +###################################################################### +while(<UNICODATA>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + $c = $f[0]; # The unicode value + $g = $f[2]; + $d = substr($g, 0, 1); + + $gcat{$c} = $g; + $dcat{$c} = $d; + $gcount{$g}++; + $dcount{$d}++; +} +close(UNIDATA); + +while(<SIMP>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + $simp{$f[0]} = $f[1]; + $sccount{$f[1]}++; +} +close(SIMP); + +sub GetClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $gcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "Lo"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "Cs"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "Cs"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "Cs"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "Co"; + } else { + printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex; + } +} +sub GetDClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $dcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "L"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "C"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "C"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "C"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "C"; + } else { + printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex; + } +} +sub DecToHex{ + my ($d) = @_; + return sprintf("%04X", $d); +} +%gtotal = (); +%dtotal = (); +while(<CLASS>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + if( substr($f[2], 0, 1) ne "a") + { + $sc = $simp{$f[2]}; + $l = hex($f[0]); + if($f[1] eq "") + { + $h = $l; + } else { + $h = hex($f[1]); + } + for($k = $l; $k <= $h ; $k++) + { + if( exists($occ{$k})) + { + # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", + # DecToHex($k), $occ{$k} , $f[2] , $sc; + } + else + { + $occ{$k} = $sc . " | " . $f[2]; + $gclass = GetClass($k); + $dclass = GetDClass($k); + $gtotal{$sc . $gclass}++; + $dtotal{$sc . $dclass}++; + $u = DecToHex($k); + $rk = " " . substr($u,0,2) . ":" . $sc; + $rangecount{$rk}++; + } + } + } +} + +#print %gtotal; +#print %dtotal; + +sub printreport +{ + print OUT "<TABLE BORDER=3>\n"; + print OUT "<TR BGCOLOR=blue><TH><TH>\n"; + + foreach $d (sort(keys %dcount)) { + print OUT "<TD BGCOLOR=red>$d</TD>\n"; + } + + print OUT "<TD BGCOLOR=white>Total</TD>\n"; + foreach $g (sort(keys %gcount)) { + print OUT "<TD BGCOLOR=yellow>$g</TD>\n"; + } + print OUT "</TR>\n"; + foreach $sc (sort(keys %sccount)) { + + print OUT "<TR><TH>$sc<TH>\n"; + + $total = 0; + foreach $d (sort (keys %dcount)) { + $count = $dtotal{$sc . $d}; + $total += $count; + print OUT "<TD>$count</TD>\n"; + } + + print OUT "<TD BGCOLOR=white>$total</TD>\n"; + + foreach $g (sort(keys %gcount)) { + $count = $gtotal{$sc . $g}; + print OUT "<TD>$count</TD>\n"; + } + + + print OUT "</TR>\n"; + } + print OUT "</TABLE>\n"; + + + print OUT "<TABLE BORDER=3>\n"; + print OUT "<TR BGCOLOR=blue><TH><TH>\n"; + + foreach $sc (sort(keys %sccount)) + { + print OUT "<TD BGCOLOR=red>$sc</TD>\n"; + } + + print OUT "</TR>\n"; + + + for($rr = 0; $rr < 0x4f; $rr++) + { + $empty = 0; + $r = sprintf("%02X" , $rr) ; + $tmp = "<TR><TH>" . $r . "<TH>\n"; + + foreach $sc (sort(keys %sccount)) { + $count = $rangecount{ " " .$r . ":" .$sc}; + $tmp .= sprintf("<TD>%s</TD>\n", $count); + $empty += $count; + } + + $tmp .= "</TR>\n"; + + if($empty ne 0) + { + print OUT $tmp; + } + } + print OUT "</TABLE>\n"; + +} +printreport(); + +sub printarray +{ + my($r, $def) = @_; +printf "[%s || %s]\n", $r, $def; + $k = hex($r) * 256; + printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r; + for($i = 0 ; $i < 256; $i+= 8) + { + for($j = 7 ; $j >= 0; $j-- ) + { + $v = $k + $i + $j; + if( exists($occ{$v})) + { + $p = substr($occ{$v}, 1,1); + } else { + $p = $def; + } + + if($j eq 7 ) + { + printf HEADER "0x%s" , $p; + } else { + printf HEADER "%s", $p ; + } + } + printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7); + } + print HEADER "};\n\n"; +} +printarray("00", "7"); +printarray("20", "7"); +printarray("21", "7"); +printarray("30", "5"); +printarray("0E", "8"); +printarray("17", "7"); + +#print %rangecount; + +###################################################################### +# +# Close files +# +###################################################################### +close(HEADER); +close(CLASS); +close(OUT); + diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt new file mode 100644 index 0000000000..c435c1ae55 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051class.txt @@ -0,0 +1,159 @@ +0000;001f;17 +0020;;17 +0024;;24 +0027;;18 +0028;;22 +002D;;18 +002F;;18 +0021;002F;23 +0030;0039;15 +003C;;22 +003A;003F;23 +0040;;18 +0041;005A;18 +005B;;22 +005E;;18 +005F;;18 +005B;005F;23 +0060;;18 +0061;007A;18 +007B;;22 +007B;007E;23 +00A0;;24 +00A3;;22 +00A5;;22 +00A9;;18 +00AA;;18 +00AB;;18 +00AC;;22 +00AE;;18 +00AF;;18 +00A1;00BF;23 +00B0;;18 +00F7;;23 +00C0;00FF;18 +0E3F;;1 +0E2F;;4 +0E46;;4 +0E5A;0E5B;4 +0E50;0E59;15 +0E4F;;18 +0EAF;;4 +0EC6;;4 +0ED0;0ED9;15 +1735;1736;1 +17D4;17D5;4 +17D8;;4 +17DA;;4 +1780;17DD;21 +17E0;17E9;21 +17F0;17F9;21 +2007;;24 +2000;200B;17 +200C;200F;18 +2010;;18 +2011;;24 +2012;2013;18 +2014;;7 +2015;;18 +2016;2017;18 +2019;;23 +201D;;23 +2018;201F;18 +2020;2023;18 +2024;2026;2 +2027;;23 +2028;202E;18 +202F;;24 +2030;2034;9 +2035;2038;18 +2039;;1 +203A;;2 +203B;;12 +203C;203D;3 +203E;;23 +203F;2043;18 +2044;;3 +2045;;1 +2046;;2 +2047;2049;3 +204A;205E;18 +205F;;17 +2060;;24 +2061;2063;18 +206A;206F;18 +2070;2071;18 +2074;208E;18 +2090;2094;18 +2116;;8 +2160;217F;12 +2190;21EA;a12 +2126;;18 +2100;2138;18 +2153;2182;18 +2190;21EA;18 +3008;;1 +300A;;1 +300C;;1 +300E;;1 +3010;;1 +3014;;1 +3016;;1 +3018;;1 +301A;;1 +301D;;1 +3001;;2 +3009;;2 +300B;;2 +300D;;2 +300F;;2 +3011;;2 +3015;;2 +3017;;2 +3019;;2 +301B;;2 +301E;;2 +301F;;2 +3005;;3 +301C;;3 +3041;;3 +3043;;3 +3045;;3 +3047;;3 +3049;;3 +3063;;3 +3083;;3 +3085;;3 +3087;;3 +308E;;3 +309D;;3 +309E;;3 +30A1;;3 +30A3;;3 +30A5;;3 +30A7;;3 +30A9;;3 +30C3;;3 +30E3;;3 +30E5;;3 +30E7;;3 +30EE;;3 +30F5;;3 +30F6;;3 +30FC;;3 +30FD;;3 +30FE;;3 +30FB;;5 +3002;;6 +3000;;10 +3042;3094;11 +3099;309E;3 +3003;;12 +3004;;12 +3006;;12 +3007;;12 +3012;;12 +3013;;12 +3020;;12 +3036;;12 +30A2;30FA;12 diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt new file mode 100644 index 0000000000..e12a7fd805 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051simp.txt @@ -0,0 +1,24 @@ +1;00_1 +2;01_[a] +3;01_[a] +4;01_[a] +5;01_[a] +6;01_[a] +7;02_7 +8;03_8 +9;04_9 +10;05_[b] +11;05_[b] +12;05_[b] +13;X +14;X +15;06_15 +16;X +17;05_[b] +18;07_18 +19;X +20;X +21;08_COMPLEX +22;09_[c] +23;0A_[d] +24;0B_[e] diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html new file mode 100644 index 0000000000..b7a642a332 --- /dev/null +++ b/intl/lwbrk/tools/spec_table.html @@ -0,0 +1,664 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <title></title> + <style type="text/css"> + table { + border: solid 1px; + border-collapse: collapse; + } + tbody, + tfoot { + border-top: solid 2px; + } + td, + th { + border: solid 1px; + } + td { + text-align: center; + } + </style> + </head> + <body> + <p>This is a specification table for line breaking.</p> + <p> + The values of IE7 and Opera9: 'A' means that the line is breakable After + the character, and 'B' means Before. 'BA' means Before and After. + </p> + <p> + (C) which is the tail of the IE7 and the Opera9 means Character. (N) means + Numeric. This means that they are around the character at testing. E.g., + "a$a" is a testcase for (C), "0$0" is a testcase for (N). + </p> + <p> + Gecko is not breaking the lines on most western language context. But for + file paths, URLs and very long word which is connected hyphens, some + characters might be breakable. They are 'breakable' in the table. However, + they are not always breakable, they <em>depend on the context</em> in the + word. + </p> + <table border="1"> + <thead> + <tr> + <th colspan="2">character</th> + <th>Gecko</th> + <th>IE7(C)</th> + <th>IE7(N)</th> + <th>Opera9.2(C)</th> + <th>Opera9.2(N)</th> + </tr> + </thead> + <tfoot> + <tr> + <th colspan="2">character</th> + <th>Gecko</th> + <th>IE7(C)</th> + <th>IE7(N)</th> + <th>Opera9.2(C)</th> + <th>Opera9.2(N)</th> + </tr> + </tfoot> + <tbody> + <tr> + <th>0x21</th> + <th>!</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x22</th> + <th>"</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x23</th> + <th>#</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x24</th> + <th>$</th> + <td></td> + <td></td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x25</th> + <th>%</th> + <td>breakable</td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x26</th> + <th>&</th> + <td>breakable</td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x27</th> + <th>'</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x28</th> + <th>(</th> + <td></td> + <td>B</td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x29</th> + <th>)</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x2A</th> + <th>*</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x2B</th> + <th>+</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x2C</th> + <th>,</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x2D</th> + <th>-</th> + <td>breakable</td> + <td>BA</td> + <td>BA</td> + <td>A</td> + <td>A</td> + </tr> + <tr> + <th>0x2E</th> + <th>.</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x2F</th> + <th>/</th> + <td>breakable</td> + <td></td> + <td></td> + <td>A</td> + <td>A</td> + </tr> + </tbody> + <tbody> + <tr> + <th>0x3A</th> + <th>:</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x3B</th> + <th>;</th> + <td>breakable</td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x3C</th> + <th><</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x3D</th> + <th>=</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x3E</th> + <th>></th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x3F</th> + <th>?</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0x40</th> + <th>@</th> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0x5B</th> + <th>[</th> + <td></td> + <td>B</td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x5C</th> + <th>\</th> + <td>breakable</td> + <td></td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x5D</th> + <th>]</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x5E</th> + <th>^</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x5F</th> + <th>_</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0x60</th> + <th>`</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0x7B</th> + <th>{</th> + <td></td> + <td>B</td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x7C</th> + <th>|</th> + <td></td> + <td></td> + <td></td> + <td>A</td> + <td>A</td> + </tr> + <tr> + <th>0x7D</th> + <th>}</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0x7E</th> + <th>~</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0xA1</th> + <th>¡</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA2</th> + <th>¢</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA3</th> + <th>£</th> + <td></td> + <td></td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA4</th> + <th>¤</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA5</th> + <th>¥</th> + <td></td> + <td></td> + <td>B</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA6</th> + <th>¦</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA7</th> + <th>§</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA8</th> + <th>¨</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xA9</th> + <th>©</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xAA</th> + <th>ª</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xAB</th> + <th>«</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xAC</th> + <th>¬</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xAE</th> + <th>®</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xAF</th> + <th>¯</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0xB0</th> + <th>°</th> + <td></td> + <td>A</td> + <td>A</td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB1</th> + <th>±</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB2</th> + <th>²</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB3</th> + <th>³</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB4</th> + <th>´</th> + <td></td> + <td></td> + <td></td> + <td>B</td> + <td>B</td> + </tr> + <tr> + <th>0xB5</th> + <th>µ</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB6</th> + <th>¶</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB7</th> + <th>·</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB8</th> + <th>¸</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xB9</th> + <th>¹</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBA</th> + <th>º</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBB</th> + <th>»</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBC</th> + <th>¼</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBD</th> + <th>½</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBE</th> + <th>¾</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + <tr> + <th>0xBF</th> + <th>¿</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0xD7</th> + <th>×</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + <tbody> + <tr> + <th>0xF7</th> + <th>÷</th> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + </tr> + </tbody> + </table> + </body> +</html> |