# 2020 September 30 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # Tests for the fts5 "trigram" tokenizer. # source [file join [file dirname [info script]] fts5_common.tcl] ifcapable !fts5 { finish_test ; return } set ::testprefix fts5trigram do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } foreach {tn s res} { 1 abc "(abc)defghijklm" 2 defgh "abc(defgh)ijklm" 3 abcdefghijklm "(abcdefghijklm)" 4 กรุ "(กรุ)งเทพมหานคร" 5 งเทพมห "กรุ(งเทพมห)านคร" 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" 7 Abc "(abc)defghijklm" 8 deFgh "abc(defgh)ijklm" 9 aBcdefGhijKlm "(abcdefghijklm)" } { do_execsql_test 1.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($s) } $res } do_execsql_test 1.2.0 { SELECT fts5_expr('ABCD', 'tokenize=trigram') } {{"abc" + "bcd"}} do_execsql_test 1.2.1 { SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a' } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 9 {%งเ%} 2 } { do_execsql_test 1.3.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } #------------------------------------------------------------------------- reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1"); INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } foreach {tn s res} { 1 abc "(abc)defghijklm" 2 defgh "abc(defgh)ijklm" 3 abcdefghijklm "(abcdefghijklm)" 4 กรุ "(กรุ)งเทพมหานคร" 5 งเทพมห "กรุ(งเทพมห)านคร" 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" 7 Abc "" 8 deFgh "" 9 aBcdefGhijKlm "" } { do_execsql_test 2.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($s) } $res } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 } { do_execsql_test 2.2.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } foreach {tn like res} { 1 {*cdef*} 1 2 {cdef*} {} 3 {*f*} 1 4 {*f?h*} 1 5 {*f?g*} {} 6 {abc*klm} 1 7 {abcdefg*} 1 8 {*รุงเ*} 2 9 {abc[d]efg*} 1 10 {abc[]d]efg*} 1 11 {abc[^]d]efg*} {} 12 {abc[^]XYZ]efg*} 1 } { do_execsql_test 2.3.$tn { SELECT rowid FROM t1 WHERE y GLOB $like } $res } do_execsql_test 2.3.null.1 { SELECT rowid FROM t1 WHERE y LIKE NULL } #------------------------------------------------------------------------- reset_db do_catchsql_test 3.1 { CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2"); } {1 {error in tokenizer constructor}} do_catchsql_test 3.2 { CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11"); } {1 {error in tokenizer constructor}} do_catchsql_test 3.3 { CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); } {0 {}} #------------------------------------------------------------------------- reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram"); } do_execsql_test 4.1 { INSERT INTO t0 VALUES (x'000b01'); } do_execsql_test 4.2 { INSERT INTO t0(t0) VALUES('integrity-check'); } #------------------------------------------------------------------------- reset_db foreach_detail_mode $::testprefix { foreach {ci} {0 1} { reset_db do_execsql_test 5.cs=$ci.0.1 " CREATE VIRTUAL TABLE t1 USING fts5( y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL% ); " do_execsql_test 5.cs=$ci.0.2 { INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 } { do_execsql_test 5.cs=$ci.1.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } } } do_execsql_test 6.0 { CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram"); CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1"); } # LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works # with case-sensitive. do_eqp_test 6.1 { SELECT * FROM ci0 WHERE x LIKE ? } {VIRTUAL TABLE INDEX 0:L0} do_eqp_test 6.2 { SELECT * FROM ci0 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} do_eqp_test 6.3 { SELECT * FROM ci1 WHERE x LIKE ? } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} do_eqp_test 6.4 { SELECT * FROM ci1 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram"); INSERT INTO f (rowid, filename) VALUES (10, "giraffe.png"), (20, "жираф.png"), (30, "cat.png"), (40, "кот.png"), (50, "misic-🎵-.mp3"); } do_execsql_test 7.1 { SELECT rowid FROM f WHERE +filename GLOB '*ир*'; } {20} do_execsql_test 7.2 { SELECT rowid FROM f WHERE filename GLOB '*ир*'; } {20} #------------------------------------------------------------------------- reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); INSERT INTO t1 VALUES('abcdefghijklm'); } foreach {tn match res} { 1 "abc ghi" "(abc)def(ghi)jklm" 2 "def ghi" "abc(defghi)jklm" 3 "efg ghi" "abcd(efghi)jklm" 4 "efghi" "abcd(efghi)jklm" 5 "abcd jklm" "(abcd)efghi(jklm)" 6 "ijkl jklm" "abcdefgh(ijklm)" 7 "ijk ijkl hijk" "abcdefg(hijkl)m" } { do_execsql_test 8.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($match) } $res } do_execsql_test 8.2 { CREATE VIRTUAL TABLE ft2 USING fts5(a, tokenize="trigram"); INSERT INTO ft2 VALUES('abc x cde'); INSERT INTO ft2 VALUES('abc cde'); INSERT INTO ft2 VALUES('abcde'); } do_execsql_test 8.3 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'abc AND cde'; } { {[abc] x [cde]} {[abc] [cde]} {[abcde]} } finish_test