1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
# 2002 May 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library. The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
finish_test
return
}
proc do_bincmp_test {testname got expect} {
binary scan $expect \c* expectvals
binary scan $got \c* gotvals
do_test $testname [list set dummy $gotvals] $expectvals
}
# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
# to change the byte-order of the string.
proc swap_byte_order {utf16} {
binary scan $utf16 \c* ints
foreach {a b} $ints {
lappend ints2 $b
lappend ints2 $a
}
return [binary format \c* $ints2]
}
#
# Test that the SQLite routines for converting between UTF encodings
# produce the same results as their TCL counterparts.
#
# $testname is the prefix to be used for the test names.
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
#
# The test procedure is:
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
# SQLite routines produce the same results.
#
# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
# SQLite routines produce the same results.
#
# 3. Use the SQLite routines to convert the native machine order UTF-16
# representation back to the original UTF-8. Check that the result
# matches the original representation.
#
# 4. Add a byte-order mark to each of the UTF-16 representations and
# check that the SQLite routines can convert them back to UTF-8. For
# byte-order mark info, refer to section 3.10 of the unicode standard.
#
# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
# that SQLite can convert them both to native byte order UTF-16
# strings, sans BOM.
#
# Coverage:
#
# sqlite_utf8to16be (step 2)
# sqlite_utf8to16le (step 1)
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
# Step 1.
set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
set utf16le_tcl [encoding convertto unicode $str]
append utf16le_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)!="littleEndian" } {
set utf16le_tcl [swap_byte_order $utf16le_tcl]
}
do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
set utf16le $utf16le_tcl
# Step 2.
set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
set utf16be_tcl [encoding convertto unicode $str]
append utf16be_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16be_tcl [swap_byte_order $utf16be_tcl]
}
do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
set utf16be $utf16be_tcl
# Step 3.
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16 $utf16le
} else {
set utf16 $utf16be
}
set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
# Step 4 (little endian).
append utf16le_bom "\xFF\xFE" $utf16le
set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
# Step 4 (big endian).
append utf16be_bom "\xFE\xFF" $utf16be
set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
# Step 5 (little endian to little endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
# Step 5 (big endian to big endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
# Step 5 (big endian to little endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
# Step 5 (little endian to big endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
}
translate_selftest
test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_conversion enc-10 [string repeat "\uE000" 100]
proc test_collate {enc zLeft zRight} {
return [string compare $zLeft $zRight]
}
add_test_collate $::DB 0 0 1
do_test enc-11.1 {
execsql {
CREATE TABLE ab(a COLLATE test_collate, b);
INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
CREATE INDEX ab_i ON ab(a, b);
}
} {}
do_test enc-11.2 {
set cp200 "\u00C8"
execsql {
SELECT count(*) FROM ab WHERE a = $::cp200;
}
} {2}
#-------------------------------------------------------------------------
reset_db
forcedelete test.db2
forcedelete test.db3
do_execsql_test enc-12.0 {
PRAGMA encoding = 'utf-8';
CREATE TABLE t1(a, b, c);
INSERT INTO t1 VALUES('a', 'b', 'c');
ATTACH 'test.db3' AS aux;
CREATE TABLE aux.t3(x, y, z);
INSERT INTO t3 VALUES('xxx', 'yyy', 'zzz');
PRAGMA encoding;
} {UTF-8}
do_test enc-12.1 {
sqlite3 db2 test.db2
db2 eval {
PRAGMA encoding = 'UTF-16le';
CREATE TABLE t2(d, e, f);
INSERT INTO t2 VALUES('d', 'e', 'f');
PRAGMA encoding;
}
} {UTF-16le}
do_test enc-12.2 {
db2 backup test.db
db2 close
} {}
do_catchsql_test enc-12.3 {
SELECT * FROM t2;
} {1 {attached databases must use the same text encoding as main database}}
db close
sqlite3 db test.db3
do_execsql_test enc-12.4 {
SELECT * FROM t3;
PRAGMA encoding = 'UTF-16le';
SELECT * FROM t3;
} {xxx yyy zzz xxx yyy zzz}
db close
sqlite3 db test.db3
breakpoint
do_execsql_test enc-12.5 {
PRAGMA encoding = 'UTF-16le';
PRAGMA encoding;
} {UTF-8}
reset_db
do_execsql_test enc-12.6 {
PRAGMA encoding = 'UTF-8';
CREATE TEMP TABLE t1(a, b, c);
INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
}
do_test enc-12.7 {
sqlite3 db2 test.db2
db2 backup test.db
db2 close
db eval {
SELECT * FROM t1;
}
} {xxx yyy zzz}
do_catchsql_test enc-12.8 {
SELECT * FROM t2;
SELECT * FROM t1;
} {1 {attached databases must use the same text encoding as main database}}
db close
sqlite3 db test.db
do_execsql_test enc-12.9 {
CREATE TEMP TABLE t1(a, b, c);
INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
}
do_execsql_test enc-12.10 {
SELECT * FROM t2;
SELECT * FROM t1;
} {d e f xxx yyy zzz}
finish_test
|