1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
|
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file contains tests focused on prefix indexes.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5prefix
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
INSERT INTO xx VALUES('one two three');
INSERT INTO xx VALUES('four five six');
INSERT INTO xx VALUES('seven eight nine ten');
}
do_execsql_test 1.1 {
SELECT rowid FROM xx WHERE xx MATCH 't*'
} {1 3}
#-------------------------------------------------------------------------
# Check that prefix indexes really do index n-character prefixes, not
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
# diacritic removal.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
}
do_test 2.1 {
foreach {rowid string} {
1 "\xCA\xCB\xCC\xCD"
2 "\u1234\u5678\u4321\u8765"
} {
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
}
} {}
do_execsql_test 2.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
foreach {tn q res} {
1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
do_execsql_test 2.3.$tn $q $res
}
#-------------------------------------------------------------------------
# Check that prefix queries with:
#
# * a column filter, and
# * no prefix index.
#
# work Ok.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
BEGIN;
INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
COMMIT;
}
foreach {tn match res} {
1 "a : c*" {1 2 4 6 7 9}
2 "b : c*" {1 3 4 5 6 8 9}
3 "c : c*" {1 2 4 6 7 8 9}
4 "a : b*" {1 3 5 6 7 8 9}
5 "b : b*" {1 2 3 5 7 9}
6 "c : b*" {1 3 7 9}
7 "a : a*" {1 3 4 5 6 8}
8 "b : a*" {2 3 4 6 7 8}
9 "c : a*" {2 3 4 5 6 7 8 9}
} {
do_execsql_test 3.1.$tn {
SELECT rowid FROM t3($match)
} $res
}
do_test 3.2 {
expr srand(0)
execsql { DELETE FROM t3 }
for {set i 0} {$i < 1000} {incr i} {
set a [fts5_rnddoc 3]
set b [fts5_rnddoc 8]
set c [fts5_rnddoc 20]
execsql { INSERT INTO t3 VALUES($a, $b, $c) }
}
execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
} {}
proc gmatch {col pattern} {
expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch
proc ghl {col pattern} {
foreach t $col {
if {[string match $pattern $t]} {
lappend res "*$t*"
} else {
lappend res $t
}
}
set res
}
db func ghl ghl
set COLS(a) 0
set COLS(b) 1
set COLS(c) 2
for {set x 0} {$x<2} {incr x} {
foreach {tn pattern} {
1 {xa*}
2 {xb*}
3 {xc*}
4 {xd*}
5 {xe*}
6 {xf*}
7 {xg*}
8 {xh*}
9 {xi*}
10 {xj*}
} {
foreach col {a b c} {
# Check that the list of returned rowids is correct.
#
set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
set query "$col : $pattern"
do_execsql_test 3.3.$x.$tn.$col.rowid {
SELECT rowid FROM t3($query);
} $res
# Check that the highlight() function works.
#
set res [db eval \
"SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')"
]
set idx $COLS($col)
do_execsql_test 3.3.$x.$tn.$col.highlight {
SELECT highlight(t3, $idx, '*', '*') FROM t3($query);
} $res
}
foreach colset {{a b} {b c} {c a} {a c} {b a}} {
# Check that the list of returned rowids is correct.
#
foreach {col1 col2} $colset {}
set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')"
set res [db eval "SELECT rowid FROM t3 WHERE $expr"]
set query "{$colset} : $pattern"
do_execsql_test 3.3.$x.$tn.{$colset}.rowid {
SELECT rowid FROM t3($query);
} $res
set resq "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')"
append resq " FROM t3 WHERE $expr"
set res [db eval $resq]
set idx1 $COLS($col1)
set idx2 $COLS($col2)
do_execsql_test 3.3.$x.$tn.{$colset}.highlight {
SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*')
FROM t3($query)
} $res
}
}
execsql { INSERT INTO t3(t3) VALUES('optimize') }
execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t2 USING fts5(c1, c2);
INSERT INTO t2 VALUES('xa xb', 'xb xa');
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048
INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096
SELECT count(*) FROM t2('x*');
} {4096}
do_execsql_test 4.1 {
UPDATE t2 SET c2 = 'ya yb';
SELECT count(*) FROM t2('c1:x*');
SELECT count(*) FROM t2('c2:x*');
} {4096 0}
do_execsql_test 4.2 {
UPDATE t2 SET c2 = 'xa';
SELECT count(*) FROM t2('c1:x*');
SELECT count(*) FROM t2('c2:x*');
} {4096 4096}
#-------------------------------------------------------------------------
#
reset_db
proc rnddoc {n} {
set map [list a b c d]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc "x[lindex $map [expr int(rand()*4)]]"
}
set doc
}
set cols [list]
for {set i 1} {$i<250} {incr i} {
lappend cols "c$i"
lappend vals "'[rnddoc 10]'"
}
do_test 5.0 {
execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])"
execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)}
execsql "INSERT INTO t4 VALUES([join $vals ,])"
execsql "INSERT INTO t4 VALUES([join $vals ,])"
execsql "INSERT INTO t4 VALUES([join $vals ,])"
execsql "INSERT INTO t4 VALUES([join $vals ,])"
} {}
proc gmatch {col pattern} {
expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch
foreach {tn col pattern} {
1 c100 {xa*}
2 c200 {xb*}
} {
set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"]
set query "$col : $pattern"
do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res
}
reset_db
db func fts5_rnddoc fts5_rnddoc
do_test 6.0 {
execsql {
CREATE VIRTUAL TABLE t5 USING fts5(x, y);
INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
}
} {}
proc gmatch {col pattern} {
expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch
foreach {tn col pattern} {
1 y {xa*}
2 y {xb*}
3 y {xc*}
4 x {xa*}
5 x {xb*}
6 x {xc*}
} {
set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"]
set query "$col : $pattern"
do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res
}
#-------------------------------------------------------------------------
# Check that the various ways of creating prefix indexes produce the
# same database on disk.
#
save_prng_state
foreach {tn create} {
1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") }
2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") }
3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) }
4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) }
} {
execsql { DROP TABLE IF EXISTS tt }
restore_prng_state
execsql $create
execsql {
INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e');
INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc');
INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd');
INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj');
INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e');
INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd');
INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff');
INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j');
INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g');
INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f');
INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c');
INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd');
INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg');
INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r}
if {$tn==1} {
set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}]
} else {
do_execsql_test 7.$tn {
SELECT md5sum(id, block) FROM tt_data
} [list $::checksum]
}
}
finish_test
|