From 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:47:29 +0200 Subject: Adding upstream version 115.8.0esr. Signed-off-by: Daniel Baumann --- intl/lwbrk/tools/anzx4051.html | 709 +++++++++++++++++++++++++++++++++++++ intl/lwbrk/tools/anzx4051.pl | 356 +++++++++++++++++++ intl/lwbrk/tools/jisx4051class.txt | 159 +++++++++ intl/lwbrk/tools/jisx4051simp.txt | 24 ++ intl/lwbrk/tools/spec_table.html | 664 ++++++++++++++++++++++++++++++++++ 5 files changed, 1912 insertions(+) create mode 100644 intl/lwbrk/tools/anzx4051.html create mode 100644 intl/lwbrk/tools/anzx4051.pl create mode 100644 intl/lwbrk/tools/jisx4051class.txt create mode 100644 intl/lwbrk/tools/jisx4051simp.txt create mode 100644 intl/lwbrk/tools/spec_table.html (limited to 'intl/lwbrk/tools') diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html new file mode 100644 index 0000000000..9f3461a285 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.html @@ -0,0 +1,709 @@ + + + + + Analysis of JIS X 4051 to Unicode General Category Mapping + + +

Analysis of JIS X 4051 to Unicode General Category Mapping

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CLMNPSZTotalCcCfCoCsLlLmLoLtLuMcMeMnNdNlNoPcPdPePfPiPoPsScSkSmSoZlZpZs
00_11411512111
01_[a]32231368824211211721
02_7111
03_8111
04_9555
05_[b]33153332513239321153332513
06_15303030
07_1818157335612523911864758133045253643249811
08_COMPLEX543320211101531122101021
09_[c]347322
0A_[d]126251448111633192372
0B_[e]111361113
X0
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
00_101_[a]02_703_804_905_[b]06_1507_1808_COMPLEX09_[c]0A_[d]0B_[e]X
0033101277442
0E16201
1724110
20211151310044
21132163
301047161
+ + diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl new file mode 100644 index 0000000000..e76eac6207 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.pl @@ -0,0 +1,356 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +###################################################################### +# +# Initial global variable +# +###################################################################### +%utot = (); +$ui=0; +$li=0; + +###################################################################### +# +# Open the unicode database file +# +###################################################################### +open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") + || die "cannot find UnicodeData-Latest.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class file +# +###################################################################### +open ( CLASS , "< jisx4051class.txt") + || die "cannot find jisx4051class.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class simplified mapping +# +###################################################################### +open ( SIMP , "< jisx4051simp.txt") + || die "cannot find jisx4051simp.txt"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( OUT , "> anzx4051.html") + || die "cannot open output anzx4051.html file"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( HEADER , "> ../jisx4051class.h") + || die "cannot open output ../jisx4051class.h file"; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$hthmlheader = < + + + + +Analysis of JIS X 4051 to Unicode General Category Mapping + + + +

+Analysis of JIS X 4051 to Unicode General Category Mapping +

+END_OF_HTML +print OUT $hthmlheader; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$npl = <) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + $c = $f[0]; # The unicode value + $g = $f[2]; + $d = substr($g, 0, 1); + + $gcat{$c} = $g; + $dcat{$c} = $d; + $gcount{$g}++; + $dcount{$d}++; +} +close(UNIDATA); + +while() { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + $simp{$f[0]} = $f[1]; + $sccount{$f[1]}++; +} +close(SIMP); + +sub GetClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $gcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "Lo"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "Cs"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "Cs"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "Cs"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "Co"; + } else { + printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex; + } +} +sub GetDClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $dcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "L"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "C"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "C"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "C"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "C"; + } else { + printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex; + } +} +sub DecToHex{ + my ($d) = @_; + return sprintf("%04X", $d); +} +%gtotal = (); +%dtotal = (); +while() { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + if( substr($f[2], 0, 1) ne "a") + { + $sc = $simp{$f[2]}; + $l = hex($f[0]); + if($f[1] eq "") + { + $h = $l; + } else { + $h = hex($f[1]); + } + for($k = $l; $k <= $h ; $k++) + { + if( exists($occ{$k})) + { + # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", + # DecToHex($k), $occ{$k} , $f[2] , $sc; + } + else + { + $occ{$k} = $sc . " | " . $f[2]; + $gclass = GetClass($k); + $dclass = GetDClass($k); + $gtotal{$sc . $gclass}++; + $dtotal{$sc . $dclass}++; + $u = DecToHex($k); + $rk = " " . substr($u,0,2) . ":" . $sc; + $rangecount{$rk}++; + } + } + } +} + +#print %gtotal; +#print %dtotal; + +sub printreport +{ + print OUT "\n"; + print OUT "\n"; + } + + print OUT "\n"; + foreach $g (sort(keys %gcount)) { + print OUT "\n"; + } + print OUT "\n"; + foreach $sc (sort(keys %sccount)) { + + print OUT "\n"; + } + + print OUT "\n"; + + foreach $g (sort(keys %gcount)) { + $count = $gtotal{$sc . $g}; + print OUT "\n"; + } + + + print OUT "\n"; + } + print OUT "
\n"; + + foreach $d (sort(keys %dcount)) { + print OUT "$dTotal$g
$sc\n"; + + $total = 0; + foreach $d (sort (keys %dcount)) { + $count = $dtotal{$sc . $d}; + $total += $count; + print OUT "$count$total$count
\n"; + + + print OUT "\n"; + print OUT "\n"; + } + + print OUT "\n"; + + + for($rr = 0; $rr < 0x4f; $rr++) + { + $empty = 0; + $r = sprintf("%02X" , $rr) ; + $tmp = "\n", $count); + $empty += $count; + } + + $tmp .= "\n"; + + if($empty ne 0) + { + print OUT $tmp; + } + } + print OUT "
\n"; + + foreach $sc (sort(keys %sccount)) + { + print OUT "$sc
" . $r . "\n"; + + foreach $sc (sort(keys %sccount)) { + $count = $rangecount{ " " .$r . ":" .$sc}; + $tmp .= sprintf("%s
\n"; + +} +printreport(); + +sub printarray +{ + my($r, $def) = @_; +printf "[%s || %s]\n", $r, $def; + $k = hex($r) * 256; + printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r; + for($i = 0 ; $i < 256; $i+= 8) + { + for($j = 7 ; $j >= 0; $j-- ) + { + $v = $k + $i + $j; + if( exists($occ{$v})) + { + $p = substr($occ{$v}, 1,1); + } else { + $p = $def; + } + + if($j eq 7 ) + { + printf HEADER "0x%s" , $p; + } else { + printf HEADER "%s", $p ; + } + } + printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7); + } + print HEADER "};\n\n"; +} +printarray("00", "7"); +printarray("20", "7"); +printarray("21", "7"); +printarray("30", "5"); +printarray("0E", "8"); +printarray("17", "7"); + +#print %rangecount; + +###################################################################### +# +# Close files +# +###################################################################### +close(HEADER); +close(CLASS); +close(OUT); + diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt new file mode 100644 index 0000000000..c435c1ae55 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051class.txt @@ -0,0 +1,159 @@ +0000;001f;17 +0020;;17 +0024;;24 +0027;;18 +0028;;22 +002D;;18 +002F;;18 +0021;002F;23 +0030;0039;15 +003C;;22 +003A;003F;23 +0040;;18 +0041;005A;18 +005B;;22 +005E;;18 +005F;;18 +005B;005F;23 +0060;;18 +0061;007A;18 +007B;;22 +007B;007E;23 +00A0;;24 +00A3;;22 +00A5;;22 +00A9;;18 +00AA;;18 +00AB;;18 +00AC;;22 +00AE;;18 +00AF;;18 +00A1;00BF;23 +00B0;;18 +00F7;;23 +00C0;00FF;18 +0E3F;;1 +0E2F;;4 +0E46;;4 +0E5A;0E5B;4 +0E50;0E59;15 +0E4F;;18 +0EAF;;4 +0EC6;;4 +0ED0;0ED9;15 +1735;1736;1 +17D4;17D5;4 +17D8;;4 +17DA;;4 +1780;17DD;21 +17E0;17E9;21 +17F0;17F9;21 +2007;;24 +2000;200B;17 +200C;200F;18 +2010;;18 +2011;;24 +2012;2013;18 +2014;;7 +2015;;18 +2016;2017;18 +2019;;23 +201D;;23 +2018;201F;18 +2020;2023;18 +2024;2026;2 +2027;;23 +2028;202E;18 +202F;;24 +2030;2034;9 +2035;2038;18 +2039;;1 +203A;;2 +203B;;12 +203C;203D;3 +203E;;23 +203F;2043;18 +2044;;3 +2045;;1 +2046;;2 +2047;2049;3 +204A;205E;18 +205F;;17 +2060;;24 +2061;2063;18 +206A;206F;18 +2070;2071;18 +2074;208E;18 +2090;2094;18 +2116;;8 +2160;217F;12 +2190;21EA;a12 +2126;;18 +2100;2138;18 +2153;2182;18 +2190;21EA;18 +3008;;1 +300A;;1 +300C;;1 +300E;;1 +3010;;1 +3014;;1 +3016;;1 +3018;;1 +301A;;1 +301D;;1 +3001;;2 +3009;;2 +300B;;2 +300D;;2 +300F;;2 +3011;;2 +3015;;2 +3017;;2 +3019;;2 +301B;;2 +301E;;2 +301F;;2 +3005;;3 +301C;;3 +3041;;3 +3043;;3 +3045;;3 +3047;;3 +3049;;3 +3063;;3 +3083;;3 +3085;;3 +3087;;3 +308E;;3 +309D;;3 +309E;;3 +30A1;;3 +30A3;;3 +30A5;;3 +30A7;;3 +30A9;;3 +30C3;;3 +30E3;;3 +30E5;;3 +30E7;;3 +30EE;;3 +30F5;;3 +30F6;;3 +30FC;;3 +30FD;;3 +30FE;;3 +30FB;;5 +3002;;6 +3000;;10 +3042;3094;11 +3099;309E;3 +3003;;12 +3004;;12 +3006;;12 +3007;;12 +3012;;12 +3013;;12 +3020;;12 +3036;;12 +30A2;30FA;12 diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt new file mode 100644 index 0000000000..e12a7fd805 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051simp.txt @@ -0,0 +1,24 @@ +1;00_1 +2;01_[a] +3;01_[a] +4;01_[a] +5;01_[a] +6;01_[a] +7;02_7 +8;03_8 +9;04_9 +10;05_[b] +11;05_[b] +12;05_[b] +13;X +14;X +15;06_15 +16;X +17;05_[b] +18;07_18 +19;X +20;X +21;08_COMPLEX +22;09_[c] +23;0A_[d] +24;0B_[e] diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html new file mode 100644 index 0000000000..b7a642a332 --- /dev/null +++ b/intl/lwbrk/tools/spec_table.html @@ -0,0 +1,664 @@ + + + + + + + + + + +

This is a specification table for line breaking.

+

+ The values of IE7 and Opera9: 'A' means that the line is breakable After + the character, and 'B' means Before. 'BA' means Before and After. +

+

+ (C) which is the tail of the IE7 and the Opera9 means Character. (N) means + Numeric. This means that they are around the character at testing. E.g., + "a$a" is a testcase for (C), "0$0" is a testcase for (N). +

+

+ Gecko is not breaking the lines on most western language context. But for + file paths, URLs and very long word which is connected hyphens, some + characters might be breakable. They are 'breakable' in the table. However, + they are not always breakable, they depend on the context in the + word. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
characterGeckoIE7(C)IE7(N)Opera9.2(C)Opera9.2(N)
characterGeckoIE7(C)IE7(N)Opera9.2(C)Opera9.2(N)
0x21!AA
0x22"
0x23#
0x24$B
0x25%breakableAA
0x26&breakable
0x27'
0x28(BB
0x29)AA
0x2A*
0x2B+
0x2C,
0x2D-breakableBABAAA
0x2E.
0x2F/breakableAA
0x3A:
0x3B;breakable
0x3C<
0x3D=
0x3E>
0x3F?AA
0x40@
0x5B[BB
0x5C\breakableB
0x5D]AA
0x5E^
0x5F_
0x60`
0x7B{BB
0x7C|AA
0x7D}AA
0x7E~
0xA1¡
0xA2¢AA
0xA3£B
0xA4¤
0xA5¥B
0xA6¦
0xA7§
0xA8¨
0xA9©
0xAAª
0xAB«
0xAC¬
0xAE®
0xAF¯
0xB0°AA
0xB1±
0xB2²
0xB3³
0xB4´BB
0xB5µ
0xB6
0xB7·
0xB8¸
0xB9¹
0xBAº
0xBB»
0xBC¼
0xBD½
0xBE¾
0xBF¿
0xD7×
0xF7÷
+ + -- cgit v1.2.3