summaryrefslogtreecommitdiffstats
path: root/mysql-test/include/ctype_unicode520.inc
blob: 46c8c7d8ec92168948b51ba828e03b1e96260df5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#
# WL#2673 Unicode collation algorithm new version
#
CREATE TABLE t1 AS SELECT repeat('a', 10) as c LIMIT 0;
SHOW CREATE TABLE t1;

#
# Unicode-5.0.0 characters
#

# Latin Extended-B and IP extensions
INSERT INTO t1 VALUES (_utf32 0x0180),(_utf32 0x023A);
INSERT INTO t1 VALUES (_utf32 0x023B),(_utf32 0x023C);
INSERT INTO t1 VALUES (_utf32 0x023D),(_utf32 0x023E);
INSERT INTO t1 VALUES (_utf32 0x0241),(_utf32 0x0242);
INSERT INTO t1 VALUES (_utf32 0x0243),(_utf32 0x0244);
INSERT INTO t1 VALUES (_utf32 0x0245),(_utf32 0x0246);
INSERT INTO t1 VALUES (_utf32 0x0247),(_utf32 0x0248);
INSERT INTO t1 VALUES (_utf32 0x0249),(_utf32 0x024A);
INSERT INTO t1 VALUES (_utf32 0x024B),(_utf32 0x024C);
INSERT INTO t1 VALUES (_utf32 0x024D),(_utf32 0x024E);
INSERT INTO t1 VALUES (_utf32 0x024F),(_utf32 0x026B);
INSERT INTO t1 VALUES (_utf32 0x027D),(_utf32 0x0289);
INSERT INTO t1 VALUES (_utf32 0x028C);

# Greek and Coptic
INSERT INTO t1 VALUES (_utf32 0x037B), (_utf32 0x037C);
INSERT INTO t1 VALUES (_utf32 0x037D), (_utf32 0x03FD);
INSERT INTO t1 VALUES (_utf32 0x03FE), (_utf32 0x03FF);

# Cyrillic
INSERT INTO t1 VALUES (_utf32 0x04C0), (_utf32 0x04CF);
INSERT INTO t1 VALUES (_utf32 0x04F6), (_utf32 0x04F7);
INSERT INTO t1 VALUES (_utf32 0x04FA), (_utf32 0x04FB); 
INSERT INTO t1 VALUES (_utf32 0x04FC), (_utf32 0x04FD);
INSERT INTO t1 VALUES (_utf32 0x04FE), (_utf32 0x04FF);
INSERT INTO t1 VALUES (_utf32 0x0510), (_utf32 0x0511);
INSERT INTO t1 VALUES (_utf32 0x0512), (_utf32 0x0513);

# Georgian, Georgian Supplement
INSERT INTO t1 VALUES (_utf32 0x10A0), (_utf32 0x10A1);
INSERT INTO t1 VALUES (_utf32 0x10A2), (_utf32 0x10A3);
INSERT INTO t1 VALUES (_utf32 0x10A4), (_utf32 0x10A5);
INSERT INTO t1 VALUES (_utf32 0x10A6), (_utf32 0x10A7);
INSERT INTO t1 VALUES (_utf32 0x2D00), (_utf32 0x2D01);
INSERT INTO t1 VALUES (_utf32 0x2D02), (_utf32 0x2D03);
INSERT INTO t1 VALUES (_utf32 0x2D04), (_utf32 0x2D05);
INSERT INTO t1 VALUES (_utf32 0x2D06), (_utf32 0x2D07);

# Phonetic Extensions
INSERT INTO t1 VALUES (_utf32 0x1D7D);

# Letterlike Symbols
INSERT INTO t1 VALUES (_utf32 0x2132),(_utf32 0x214E);

# Number Forms
INSERT INTO t1 VALUES (_utf32 0x2183),(_utf32 0x2184);

# Coptic
INSERT INTO t1 VALUES (_utf32 0x2C80), (_utf32 0x2C81);
INSERT INTO t1 VALUES (_utf32 0x2C82), (_utf32 0x2C83);
INSERT INTO t1 VALUES (_utf32 0x2C84), (_utf32 0x2C85);
INSERT INTO t1 VALUES (_utf32 0x2C86), (_utf32 0x2C87);
INSERT INTO t1 VALUES (_utf32 0x2C88), (_utf32 0x2C89);
INSERT INTO t1 VALUES (_utf32 0x2C8A), (_utf32 0x2C8B);
INSERT INTO t1 VALUES (_utf32 0x2C8C), (_utf32 0x2C8D);
INSERT INTO t1 VALUES (_utf32 0x2C8E), (_utf32 0x2C8F);

# Latin Extended-C
INSERT INTO t1 VALUES (_utf32 0x2C60), (_utf32 0x2C61);
INSERT INTO t1 VALUES (_utf32 0x2C62), (_utf32 0x2C63);
INSERT INTO t1 VALUES (_utf32 0x2C64), (_utf32 0x2C65);
INSERT INTO t1 VALUES (_utf32 0x2C66), (_utf32 0x2C67);
INSERT INTO t1 VALUES (_utf32 0x2C68), (_utf32 0x2C69);
INSERT INTO t1 VALUES (_utf32 0x2C6A), (_utf32 0x2C6B);
INSERT INTO t1 VALUES (_utf32 0x2C6C), (_utf32 0x2C75);
INSERT INTO t1 VALUES (_utf32 0x2C76);

# Glagolitic
INSERT INTO t1 VALUES (_utf32 0x2C00), (_utf32 0x2C01);
INSERT INTO t1 VALUES (_utf32 0x2C02), (_utf32 0x2C03);
INSERT INTO t1 VALUES (_utf32 0x2C04), (_utf32 0x2C05);
INSERT INTO t1 VALUES (_utf32 0x2C06), (_utf32 0x2C07);
INSERT INTO t1 VALUES (_utf32 0x2C30), (_utf32 0x2C31);
INSERT INTO t1 VALUES (_utf32 0x2C32), (_utf32 0x2C33);
INSERT INTO t1 VALUES (_utf32 0x2C34), (_utf32 0x2C35);
INSERT INTO t1 VALUES (_utf32 0x2C36), (_utf32 0x2C37);

# Deseret
INSERT INTO t1 VALUES (_utf32 0x10400), (_utf32 0x10401);
INSERT INTO t1 VALUES (_utf32 0x10402), (_utf32 0x10403);
INSERT INTO t1 VALUES (_utf32 0x10404), (_utf32 0x10405);
INSERT INTO t1 VALUES (_utf32 0x10406), (_utf32 0x10407);
INSERT INTO t1 VALUES (_utf32 0x10428), (_utf32 0x10429);
INSERT INTO t1 VALUES (_utf32 0x1042A), (_utf32 0x1042B);
INSERT INTO t1 VALUES (_utf32 0x1042C), (_utf32 0x1042D);
INSERT INTO t1 VALUES (_utf32 0x1042E), (_utf32 0x1042F);


#
# Unicode 5.1.0 characters
#

INSERT INTO t1 VALUES (_utf32 0x0370); # GREEK CAPITAL LETTER HETA
INSERT INTO t1 VALUES (_utf32 0x0371); # GREEK SMALL LETTER HETA
INSERT INTO t1 VALUES (_utf32 0x0372); # GREEK CAPITAL LETTER ARCHAIC SAMPI
INSERT INTO t1 VALUES (_utf32 0x0373); # GREEK SMALL LETTER ARCHAIC SAMPI

INSERT INTO t1 VALUES (_utf32 0x0514); # CYRILLIC CAPITAL LETTER LHA
INSERT INTO t1 VALUES (_utf32 0x0515); # CYRILLIC SMALL LETTER LHA
INSERT INTO t1 VALUES (_utf32 0x0516); # CYRILLIC CAPITAL LETTER RHA
INSERT INTO t1 VALUES (_utf32 0x0517); # CYRILLIC SMALL LETTER RHA

INSERT INTO t1 VALUES (_utf32 0xA640); # CYRILLIC CAPITAL LETTER ZEMLYA
INSERT INTO t1 VALUES (_utf32 0xA641); # CYRILLIC SMALL LETTER ZEMLYA
INSERT INTO t1 VALUES (_utf32 0xA642); # CYRILLIC CAPITAL LETTER DZELO
INSERT INTO t1 VALUES (_utf32 0xA643); # CYRILLIC SMALL LETTER DZELO

INSERT INTO t1 VALUES (_utf32 0xA722); # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
INSERT INTO t1 VALUES (_utf32 0xA723); # LATIN SMALL LETTER EGYPTOLOGICAL ALEF
INSERT INTO t1 VALUES (_utf32 0xA724); # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
INSERT INTO t1 VALUES (_utf32 0xA725); # LATIN SMALL LETTER EGYPTOLOGICAL AIN

INSERT INTO t1 VALUES (_utf32 0xA726); # LATIN CAPITAL LETTER HENG
INSERT INTO t1 VALUES (_utf32 0xA727); # LATIN SMALL LETTER HENG
INSERT INTO t1 VALUES (_utf32 0xA728); # LATIN CAPITAL LETTER TZ
INSERT INTO t1 VALUES (_utf32 0xA729); # LATIN SMALL LETTER TZ
INSERT INTO t1 VALUES (_utf32 0xA72A); # LATIN CAPITAL LETTER TRESILLO
INSERT INTO t1 VALUES (_utf32 0xA72B); # LATIN SMALL LETTER TRESILLO

#
# Unicode 5.2.0 characters
#

INSERT INTO t1 VALUES (_utf32 0x2CEB); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI;Lu;0;L;;;;;N;;;;2CEC;
INSERT INTO t1 VALUES (_utf32 0x2CEC); # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI;Ll;0;L;;;;;N;;;2CEB;;2CEB
INSERT INTO t1 VALUES (_utf32 0x2CED); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA;Lu;0;L;;;;;N;;;;2CEE;
INSERT INTO t1 VALUES (_utf32 0x2CEE); # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA;Ll;0;L;;;;;N;;;2CED;;2CED

#
# Check case folding and UCA weights
#
SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c
FROM t1 ORDER BY c, BINARY c;


#
# Check that LIKE works fine with and without index.
# This test makes sure that cs->min_sort_char and cs->max_sort_char
# are set properly
# Also check that LIKE is case insensitive for supplementary characters
#
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0xFFFF));
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10FFFF));
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10400));
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c;
ALTER TABLE t1 ADD KEY(c);
EXPLAIN SELECT hex(c) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c;

DROP TABLE t1;