# 2023 Nov 03 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the built-in fts5 tokenizers. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer2 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } sqlite3_fts5_create_tokenizer db tst get_tst_tokenizer proc get_tst_tokenizer {args} { return "tst_tokenizer" } proc tst_tokenizer {flags txt} { set token "" set lTok [list] foreach c [split $txt {}] { if {$token==""} { append token $c } else { set t1 [string is upper $token] set t2 [string is upper $c] if {$t1!=$t2} { lappend lTok $token set token "" } append token $c } } if {$token!=""} { lappend lTok $token } set iOff 0 foreach t $lTok { set n [string length $t] sqlite3_fts5_token $t $iOff [expr $iOff+$n] incr iOff $n } } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(t, tokenize=tst); } do_execsql_test 1.1 { INSERT INTO t1 VALUES('AAdontBBmess'); } do_execsql_test 1.2 { SELECT snippet(t1, 0, '>', '<', '...', 4) FROM t1('BB'); } {AAdont>BB', '<') FROM t1('BB'); } {AAdont>BB', '<') FROM t1('AA'); } {>AA', '<') FROM t1('dont'); } {AA>dont', '<') FROM t1('mess'); } {AAdontBB>mess<} do_execsql_test 1.7 { SELECT highlight(t1, 0, '>', '<') FROM t1('BB mess'); } {AAdont>BBmess<} # 2024-08-06 https://sqlite.org/forum/forumpost/171bcc2bcd # Error handling of tokenize= arguments. # foreach {n tkz} { 1 {ascii none} 2 {unicode61 none} 3 {porter none} 4 {trigram none} 5 {ascii none 0} 6 {unicode61 none 0} 7 {porter none 0} 8 {trigram none 0} } { db eval {DROP TABLE IF EXISTS t2;} do_catchsql_test 2.$n " DROP TABLE IF EXISTS t2; CREATE VIRTUAL TABLE t2 USING fts5(a,b,c,tokenize='$tkz'); " {1 {error in tokenizer constructor}} } finish_test