diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 14:07:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 14:07:11 +0000 |
commit | 63847496f14c813a5d80efd5b7de0f1294ffe1e3 (patch) | |
tree | 01c7571c7c762ceee70638549a99834fdd7c411b /ext/fts5/test/fts5trigram.test | |
parent | Initial commit. (diff) | |
download | sqlite3-63847496f14c813a5d80efd5b7de0f1294ffe1e3.tar.xz sqlite3-63847496f14c813a5d80efd5b7de0f1294ffe1e3.zip |
Adding upstream version 3.45.1.upstream/3.45.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ext/fts5/test/fts5trigram.test')
-rw-r--r-- | ext/fts5/test/fts5trigram.test | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test new file mode 100644 index 0000000..351c059 --- /dev/null +++ b/ext/fts5/test/fts5trigram.test @@ -0,0 +1,256 @@ +# 2020 September 30 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# Tests for the fts5 "trigram" tokenizer. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +ifcapable !fts5 { finish_test ; return } +set ::testprefix fts5trigram + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); + INSERT INTO t1 VALUES('abcdefghijklm'); + INSERT INTO t1 VALUES('กรุงเทพมหานคร'); +} + +foreach {tn s res} { + 1 abc "(abc)defghijklm" + 2 defgh "abc(defgh)ijklm" + 3 abcdefghijklm "(abcdefghijklm)" + 4 กรุ "(กรุ)งเทพมหานคร" + 5 งเทพมห "กรุ(งเทพมห)านคร" + 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" + 7 Abc "(abc)defghijklm" + 8 deFgh "abc(defgh)ijklm" + 9 aBcdefGhijKlm "(abcdefghijklm)" +} { + do_execsql_test 1.1.$tn { + SELECT highlight(t1, 0, '(', ')') FROM t1($s) + } $res +} + +do_execsql_test 1.2.0 { + SELECT fts5_expr('ABCD', 'tokenize=trigram') +} {{"abc" + "bcd"}} + +do_execsql_test 1.2.1 { + SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a' +} + +foreach {tn like res} { + 1 {%cDef%} 1 + 2 {cDef%} {} + 3 {%f%} 1 + 4 {%f_h%} 1 + 5 {%f_g%} {} + 6 {abc%klm} 1 + 7 {ABCDEFG%} 1 + 8 {%รุงเ%} 2 + 9 {%งเ%} 2 +} { + do_execsql_test 1.3.$tn { + SELECT rowid FROM t1 WHERE y LIKE $like + } $res +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1"); + INSERT INTO t1 VALUES('abcdefghijklm'); + INSERT INTO t1 VALUES('กรุงเทพมหานคร'); +} + +foreach {tn s res} { + 1 abc "(abc)defghijklm" + 2 defgh "abc(defgh)ijklm" + 3 abcdefghijklm "(abcdefghijklm)" + 4 กรุ "(กรุ)งเทพมหานคร" + 5 งเทพมห "กรุ(งเทพมห)านคร" + 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" + 7 Abc "" + 8 deFgh "" + 9 aBcdefGhijKlm "" +} { + do_execsql_test 2.1.$tn { + SELECT highlight(t1, 0, '(', ')') FROM t1($s) + } $res +} +foreach {tn like res} { + 1 {%cDef%} 1 + 2 {cDef%} {} + 3 {%f%} 1 + 4 {%f_h%} 1 + 5 {%f_g%} {} + 6 {abc%klm} 1 + 7 {ABCDEFG%} 1 + 8 {%รุงเ%} 2 +} { + do_execsql_test 2.2.$tn { + SELECT rowid FROM t1 WHERE y LIKE $like + } $res +} +foreach {tn like res} { + 1 {*cdef*} 1 + 2 {cdef*} {} + 3 {*f*} 1 + 4 {*f?h*} 1 + 5 {*f?g*} {} + 6 {abc*klm} 1 + 7 {abcdefg*} 1 + 8 {*รุงเ*} 2 + 9 {abc[d]efg*} 1 + 10 {abc[]d]efg*} 1 + 11 {abc[^]d]efg*} {} + 12 {abc[^]XYZ]efg*} 1 +} { + do_execsql_test 2.3.$tn { + SELECT rowid FROM t1 WHERE y GLOB $like + } $res +} + +do_execsql_test 2.3.null.1 { + SELECT rowid FROM t1 WHERE y LIKE NULL +} + +#------------------------------------------------------------------------- +reset_db +do_catchsql_test 3.1 { + CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2"); +} {1 {error in tokenizer constructor}} +do_catchsql_test 3.2 { + CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11"); +} {1 {error in tokenizer constructor}} +do_catchsql_test 3.3 { + CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); +} {0 {}} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram"); +} +do_execsql_test 4.1 { + INSERT INTO t0 VALUES (x'000b01'); +} +do_execsql_test 4.2 { + INSERT INTO t0(t0) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +reset_db +foreach_detail_mode $::testprefix { + foreach {ci} {0 1} { + reset_db + do_execsql_test 5.cs=$ci.0.1 " + CREATE VIRTUAL TABLE t1 USING fts5( + y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL% + ); + " + do_execsql_test 5.cs=$ci.0.2 { + INSERT INTO t1 VALUES('abcdefghijklm'); + INSERT INTO t1 VALUES('กรุงเทพมหานคร'); + } + + foreach {tn like res} { + 1 {%cDef%} 1 + 2 {cDef%} {} + 3 {%f%} 1 + 4 {%f_h%} 1 + 5 {%f_g%} {} + 6 {abc%klm} 1 + 7 {ABCDEFG%} 1 + 8 {%รุงเ%} 2 + } { + do_execsql_test 5.cs=$ci.1.$tn { + SELECT rowid FROM t1 WHERE y LIKE $like + } $res + } + } +} + +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram"); + CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1"); +} + +# LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works +# with case-sensitive. +do_eqp_test 6.1 { + SELECT * FROM ci0 WHERE x LIKE ? +} {VIRTUAL TABLE INDEX 0:L0} +do_eqp_test 6.2 { + SELECT * FROM ci0 WHERE x GLOB ? +} {VIRTUAL TABLE INDEX 0:G0} +do_eqp_test 6.3 { + SELECT * FROM ci1 WHERE x LIKE ? +} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} +do_eqp_test 6.4 { + SELECT * FROM ci1 WHERE x GLOB ? +} {VIRTUAL TABLE INDEX 0:G0} + +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram"); + INSERT INTO f (rowid, filename) VALUES + (10, "giraffe.png"), + (20, "жираф.png"), + (30, "cat.png"), + (40, "кот.png"), + (50, "misic-🎵-.mp3"); +} +do_execsql_test 7.1 { + SELECT rowid FROM f WHERE +filename GLOB '*ир*'; +} {20} +do_execsql_test 7.2 { + SELECT rowid FROM f WHERE filename GLOB '*ир*'; +} {20} + + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); + INSERT INTO t1 VALUES('abcdefghijklm'); +} + +foreach {tn match res} { + 1 "abc ghi" "(abc)def(ghi)jklm" + 2 "def ghi" "abc(defghi)jklm" + 3 "efg ghi" "abcd(efghi)jklm" + 4 "efghi" "abcd(efghi)jklm" + 5 "abcd jklm" "(abcd)efghi(jklm)" + 6 "ijkl jklm" "abcdefgh(ijklm)" + 7 "ijk ijkl hijk" "abcdefg(hijkl)m" + +} { + do_execsql_test 8.1.$tn { + SELECT highlight(t1, 0, '(', ')') FROM t1($match) + } $res +} + +do_execsql_test 8.2 { + CREATE VIRTUAL TABLE ft2 USING fts5(a, tokenize="trigram"); + INSERT INTO ft2 VALUES('abc x cde'); + INSERT INTO ft2 VALUES('abc cde'); + INSERT INTO ft2 VALUES('abcde'); +} + +do_execsql_test 8.3 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'abc AND cde'; +} { + {[abc] x [cde]} + {[abc] [cde]} + {[abcde]} +} + +finish_test |