summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/jisx4051pairtable.txt
diff options
context:
space:
mode:
Diffstat (limited to 'intl/lwbrk/jisx4051pairtable.txt')
-rw-r--r--intl/lwbrk/jisx4051pairtable.txt286
1 files changed, 286 insertions, 0 deletions
diff --git a/intl/lwbrk/jisx4051pairtable.txt b/intl/lwbrk/jisx4051pairtable.txt
new file mode 100644
index 0000000000..2bae1b18fe
--- /dev/null
+++ b/intl/lwbrk/jisx4051pairtable.txt
@@ -0,0 +1,286 @@
+
+
+
+/*
+
+ Simplification of Pair Table in JIS X 4051
+
+ 1. The Origion Table - in 4.1.3
+
+ In JIS x 4051. The pair table is defined as below
+
+ Class of
+ Leading Class of Trailing Char Class
+ Char
+
+ 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
+ * # * #
+ 1 X X X X X X X X X X X X X X X X X X X X X E
+ 2 X X X X X X
+ 3 X X X X X X
+ 4 X X X X X X
+ 5 X X X X X X
+ 6 X X X X X X
+ 7 X X X X X X X
+ 8 X X X X X X E
+ 9 X X X X X X
+ 10 X X X X X X
+ 11 X X X X X X
+ 12 X X X X X X
+ 13 X X X X X X X
+ 14 X X X X X X X
+ 15 X X X X X X X X X
+ 16 X X X X X X X X
+ 17 X X X X X E
+ 18 X X X X X X X X X
+ 19 X E E E E E X X X X X X X X X X X X E X E E
+ 20 X X X X X E
+
+ * Same Char
+ # Other Char
+
+ 2. Simplified by remove the class which we do not care
+
+ However, since we do not care about class 13(Subscript), 14(Ruby),
+ 19(split line note begin quote), and 20(split line note end quote)
+ we can simplify this par table into the following
+
+ Class of
+ Leading Class of Trailing Char Class
+ Char
+
+ 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
+
+ 1 X X X X X X X X X X X X X X X X
+ 2 X X X X X
+ 3 X X X X X
+ 4 X X X X X
+ 5 X X X X X
+ 6 X X X X X
+ 7 X X X X X X
+ 8 X X X X X X
+ 9 X X X X X
+ 10 X X X X X
+ 11 X X X X X
+ 12 X X X X X
+ 15 X X X X X X X X
+ 16 X X X X X X X
+ 17 X X X X X
+ 18 X X X X X X X X
+
+ 3. Simplified by merged classes
+
+ After the 2 simplification, the pair table have some duplication
+ a. class 2, 3, 4, 5, 6, are the same- we can merged them
+ b. class 10, 11, 12, 17 are the same- we can merged them
+
+
+ Class of
+ Leading Class of Trailing Char Class
+ Char
+
+ 1 [a] 7 8 9 [b]15 16 18
+
+ 1 X X X X X X X X X
+ [a] X
+ 7 X X
+ 8 X X
+ 9 X
+ [b] X
+ 15 X X X X
+ 16 X X X
+ 18 X X X X
+
+
+ 4. Now we use one bit to encode weather it is breakable, and use 2 bytes
+ for one row, then the bit table will look like:
+
+ 18 <- 1
+
+ 1 0000 0001 1111 1111 = 0x01FF
+ [a] 0000 0000 0000 0010 = 0x0002
+ 7 0000 0000 0000 0110 = 0x0006
+ 8 0000 0000 0100 0010 = 0x0042
+ 9 0000 0000 0000 0010 = 0x0002
+ [b] 0000 0000 0000 0010 = 0x0042
+ 15 0000 0001 0101 0010 = 0x0152
+ 16 0000 0001 1000 0010 = 0x0182
+ 17 0000 0001 1100 0010 = 0x01C2
+
+*/
+
+static uint16_t gJISx4051SimplifiedPair[9] = {
+ 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
+};
+
+PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
+{
+ NS_ASSERTION( (aCls1 < 9) "invalid class");
+ NS_ASSERTION( (aCls2 < 9) "invalid class");
+ return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
+}
+
+
+#define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
+
+nsJISx4051Cls XXXX::GetClass(
+ PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
+{
+ // take care the special case in cls 15
+ if( ((0x2C == aChar) || (0x2E == aChar)) &&
+ (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
+ {
+ return kJISx4051Cls_15;
+ }
+
+ nsJISx4051Cls cls;
+ if(gSingle->Lookup(aChar, &cls))
+ return cls;
+
+ if(gRange->Lookup(aChar, &cls))
+ return cls;
+
+ return kJISx4051Cls_15;
+}
+
+
+typedef enum {
+ kJISx4051Cls_1 = 0,
+ kJISx4051Cls_2 = 1,
+ kJISx4051Cls_3 = 1,
+ kJISx4051Cls_4 = 1,
+ kJISx4051Cls_5 = 1,
+ kJISx4051Cls_6 = 1,
+ kJISx4051Cls_7 = 2,
+ kJISx4051Cls_8 = 3,
+ kJISx4051Cls_9 = 4,
+ kJISx4051Cls_10 = 5,
+ kJISx4051Cls_11 = 5,
+ kJISx4051Cls_12 = 5,
+ // kJISx4051Cls_13 = 0,
+ // kJISx4051Cls_14 = 0,
+ kJISx4051Cls_15 = 6,
+ kJISx4051Cls_16 = 7,
+ kJISx4051Cls_17 = 5,
+ kJISx4051Cls_18 = 8,
+ // kJISx4051Cls_19 = 0,
+ // kJISx4051Cls_20 = 0
+} nsJISx4051Cls;
+
+
+ // Table 2
+ YYYY(kJISx4051Cls_1 , 0x0028),
+ YYYY(kJISx4051Cls_1 , 0x005B),
+ YYYY(kJISx4051Cls_1 , 0x007B),
+ YYYY(kJISx4051Cls_1 , 0x2018),
+ YYYY(kJISx4051Cls_1 , 0x201B),
+ YYYY(kJISx4051Cls_1 , 0x201C),
+ YYYY(kJISx4051Cls_1 , 0x201F),
+ YYYY(kJISx4051Cls_1 , 0x3008),
+ YYYY(kJISx4051Cls_1 , 0x300A),
+ YYYY(kJISx4051Cls_1 , 0x300C),
+ YYYY(kJISx4051Cls_1 , 0x300E),
+ YYYY(kJISx4051Cls_1 , 0x3010),
+ YYYY(kJISx4051Cls_1 , 0x3014),
+ YYYY(kJISx4051Cls_1 , 0x3016),
+ YYYY(kJISx4051Cls_1 , 0x3018),
+ YYYY(kJISx4051Cls_1 , 0x301A),
+ YYYY(kJISx4051Cls_1 , 0x301D),
+
+ // Table 3
+ YYYY(kJISx4051Cls_2 , 0x0029),
+ YYYY(kJISx4051Cls_2 , 0x002C),
+ YYYY(kJISx4051Cls_2 , 0x005D),
+ YYYY(kJISx4051Cls_2 , 0x007D),
+ YYYY(kJISx4051Cls_2 , 0x2019),
+ YYYY(kJISx4051Cls_2 , 0x201A),
+ YYYY(kJISx4051Cls_2 , 0x201D),
+ YYYY(kJISx4051Cls_2 , 0x201E),
+ YYYY(kJISx4051Cls_2 , 0x3001),
+ YYYY(kJISx4051Cls_2 , 0x3009),
+ YYYY(kJISx4051Cls_2 , 0x300B),
+ YYYY(kJISx4051Cls_2 , 0x300D),
+ YYYY(kJISx4051Cls_2 , 0x300F),
+ YYYY(kJISx4051Cls_2 , 0x3011),
+ YYYY(kJISx4051Cls_2 , 0x3015),
+ YYYY(kJISx4051Cls_2 , 0x3017),
+ YYYY(kJISx4051Cls_2 , 0x3019),
+ YYYY(kJISx4051Cls_2 , 0x301B),
+ YYYY(kJISx4051Cls_2 , 0x301E),
+ YYYY(kJISx4051Cls_2 , 0x301F),
+
+ // Table 4
+ YYYY(kJISx4051Cls_3 , 0x203C),
+ YYYY(kJISx4051Cls_3 , 0x2044),
+ YYYY(kJISx4051Cls_3 , 0x301C),
+ YYYY(kJISx4051Cls_3 , 0x3041),
+ YYYY(kJISx4051Cls_3 , 0x3043),
+ YYYY(kJISx4051Cls_3 , 0x3045),
+ YYYY(kJISx4051Cls_3 , 0x3047),
+ YYYY(kJISx4051Cls_3 , 0x3049),
+ YYYY(kJISx4051Cls_3 , 0x3063),
+ YYYY(kJISx4051Cls_3 , 0x3083),
+ YYYY(kJISx4051Cls_3 , 0x3085),
+ YYYY(kJISx4051Cls_3 , 0x3087),
+ YYYY(kJISx4051Cls_3 , 0x308E),
+ YYYY(kJISx4051Cls_3 , 0x309D),
+ YYYY(kJISx4051Cls_3 , 0x309E),
+ YYYY(kJISx4051Cls_3 , 0x30A1),
+ YYYY(kJISx4051Cls_3 , 0x30A3),
+ YYYY(kJISx4051Cls_3 , 0x30A5),
+ YYYY(kJISx4051Cls_3 , 0x30A7),
+ YYYY(kJISx4051Cls_3 , 0x30A9),
+ YYYY(kJISx4051Cls_3 , 0x30C3),
+ YYYY(kJISx4051Cls_3 , 0x30E3),
+ YYYY(kJISx4051Cls_3 , 0x30E5),
+ YYYY(kJISx4051Cls_3 , 0x30E7),
+ YYYY(kJISx4051Cls_3 , 0x30EE),
+ YYYY(kJISx4051Cls_3 , 0x30F5),
+ YYYY(kJISx4051Cls_3 , 0x30F6),
+ YYYY(kJISx4051Cls_3 , 0x30FC),
+ YYYY(kJISx4051Cls_3 , 0x30FD),
+ YYYY(kJISx4051Cls_3 , 0x30FE),
+
+ // Table 5
+ YYYY(kJISx4051Cls_4 , 0x0021),
+ YYYY(kJISx4051Cls_4 , 0x003F),
+
+ // Table 6
+ YYYY(kJISx4051Cls_5 , 0x003A),
+ YYYY(kJISx4051Cls_5 , 0x003B),
+ YYYY(kJISx4051Cls_5 , 0x30FB),
+
+ // Table 7
+ YYYY(kJISx4051Cls_6 , 0x002E),
+ YYYY(kJISx4051Cls_6 , 0x3002),
+
+ // Table 8
+ YYYY(kJISx4051Cls_7 , 0x2014),
+ YYYY(kJISx4051Cls_7 , 0x2024),
+ YYYY(kJISx4051Cls_7 , 0x2025),
+ YYYY(kJISx4051Cls_7 , 0x2026),
+
+ // Table 9
+ YYYY(kJISx4051Cls_8 , 0x0024),
+ YYYY(kJISx4051Cls_8 , 0x00A3),
+ YYYY(kJISx4051Cls_8 , 0x00A5),
+ YYYY(kJISx4051Cls_8 , 0x2116),
+
+ // Table 10
+ YYYY(kJISx4051Cls_9 , 0x0025),
+ YYYY(kJISx4051Cls_9 , 0x00A2),
+ YYYY(kJISx4051Cls_9 , 0x00B0),
+ YYYY(kJISx4051Cls_9 , 0x2030),
+ YYYY(kJISx4051Cls_9 , 0x2031),
+ YYYY(kJISx4051Cls_9 , 0x2032),
+ YYYY(kJISx4051Cls_9 , 0x2033),
+
+ // Table 1
+ YYYY(kJISx4051Cls_10, 0x3000),
+
+ // Table 1
+ ZZZZ(kJISx4051Cls_11, 0x3000),
+
+
+
+