summaryrefslogtreecommitdiffstats
path: root/test/fts3rnd.test
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:28:19 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:28:19 +0000
commit18657a960e125336f704ea058e25c27bd3900dcb (patch)
tree17b438b680ed45a996d7b59951e6aa34023783f2 /test/fts3rnd.test
parentInitial commit. (diff)
downloadsqlite3-18657a960e125336f704ea058e25c27bd3900dcb.tar.xz
sqlite3-18657a960e125336f704ea058e25c27bd3900dcb.zip
Adding upstream version 3.40.1.upstream/3.40.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'test/fts3rnd.test')
-rw-r--r--test/fts3rnd.test460
1 files changed, 460 insertions, 0 deletions
diff --git a/test/fts3rnd.test b/test/fts3rnd.test
new file mode 100644
index 0000000..97af549
--- /dev/null
+++ b/test/fts3rnd.test
@@ -0,0 +1,460 @@
+# 2009 December 03
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Brute force (random data) tests for FTS3.
+#
+
+#-------------------------------------------------------------------------
+#
+# The FTS3 tests implemented in this file focus on testing that FTS3
+# returns the correct set of documents for various types of full-text
+# query. This is done using pseudo-randomly generated data and queries.
+# The expected result of each query is calculated using Tcl code.
+#
+# 1. The database is initialized to contain a single table with three
+# columns. 100 rows are inserted into the table. Each of the three
+# values in each row is a document consisting of between 0 and 100
+# terms. Terms are selected from a vocabulary of $G(nVocab) terms.
+#
+# 2. The following is performed 100 times:
+#
+# a. A row is inserted into the database. The row contents are
+# generated as in step 1. The docid is a pseudo-randomly selected
+# value between 0 and 1000000.
+#
+# b. A psuedo-randomly selected row is updated. One of its columns is
+# set to contain a new document generated in the same way as the
+# documents in step 1.
+#
+# c. A psuedo-randomly selected row is deleted.
+#
+# d. For each of several types of fts3 queries, 10 SELECT queries
+# of the form:
+#
+# SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'
+#
+# are evaluated. The results are compared to those calculated by
+# Tcl code in this file. The patterns used for the different query
+# types are:
+#
+# 1. query = <term>
+# 2. query = <prefix>
+# 3. query = "<term> <term>"
+# 4. query = "<term> <term> <term>"
+# 5. query = "<prefix> <prefix> <prefix>"
+# 6. query = <term> NEAR <term>
+# 7. query = <term> NEAR/11 <term> NEAR/11 <term>
+# 8. query = <term> OR <term>
+# 9. query = <term> NOT <term>
+# 10. query = <term> AND <term>
+# 11. query = <term> NEAR <term> OR <term> NEAR <term>
+# 12. query = <term> NEAR <term> NOT <term> NEAR <term>
+# 13. query = <term> NEAR <term> AND <term> NEAR <term>
+#
+# where <term> is a term psuedo-randomly selected from the vocabulary
+# and prefix is the first 2 characters of such a term followed by
+# a "*" character.
+#
+# Every second iteration, steps (a) through (d) above are performed
+# within a single transaction. This forces the queries in (d) to
+# read data from both the database and the in-memory hash table
+# that caches the full-text index entries created by steps (a), (b)
+# and (c) until the transaction is committed.
+#
+# The procedure above is run 5 times, using advisory fts3 node sizes of 50,
+# 500, 1000 and 2000 bytes.
+#
+# After the test using an advisory node-size of 50, an OOM test is run using
+# the database. This test is similar to step (d) above, except that it tests
+# the effects of transient and persistent OOM conditions encountered while
+# executing each query.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If this build does not include FTS3, skip the tests in this file.
+#
+ifcapable !fts3 { finish_test ; return }
+source $testdir/fts3_common.tcl
+source $testdir/malloc_common.tcl
+
+set G(nVocab) 100
+
+set nVocab 100
+set lVocab [list]
+
+expr srand(0)
+
+# Generate a vocabulary of nVocab words. Each word is 3 characters long.
+#
+set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+for {set i 0} {$i < $nVocab} {incr i} {
+ set len [expr int(rand()*3)+2]
+ set word [lindex $lChar [expr int(rand()*26)]]
+ append word [lindex $lChar [expr int(rand()*26)]]
+ if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }
+ if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }
+ lappend lVocab $word
+}
+
+proc random_term {} {
+ lindex $::lVocab [expr {int(rand()*$::nVocab)}]
+}
+
+# Return a document consisting of $nWord arbitrarily selected terms
+# from the $::lVocab list.
+#
+proc generate_doc {nWord} {
+ set doc [list]
+ for {set i 0} {$i < $nWord} {incr i} {
+ lappend doc [random_term]
+ }
+ return $doc
+}
+
+
+
+# Primitives to update the table.
+#
+unset -nocomplain t1
+proc insert_row {rowid} {
+ set a [generate_doc [expr int((rand()*100))]]
+ set b [generate_doc [expr int((rand()*100))]]
+ set c [generate_doc [expr int((rand()*100))]]
+ execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }
+ set ::t1($rowid) [list $a $b $c]
+}
+proc delete_row {rowid} {
+ execsql { DELETE FROM t1 WHERE rowid = $rowid }
+ catch {unset ::t1($rowid)}
+}
+proc update_row {rowid} {
+ set cols {a b c}
+ set iCol [expr int(rand()*3)]
+ set doc [generate_doc [expr int((rand()*100))]]
+ lset ::t1($rowid) $iCol $doc
+ execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"
+}
+
+proc simple_phrase {zPrefix} {
+ set ret [list]
+
+ set reg [string map {* {[^ ]*}} $zPrefix]
+ set reg " $reg "
+
+ foreach key [lsort -integer [array names ::t1]] {
+ set value $::t1($key)
+ set cnt [list]
+ foreach col $value {
+ if {[regexp $reg " $col "]} { lappend ret $key ; break }
+ }
+ }
+
+ #lsort -uniq -integer $ret
+ set ret
+}
+
+# This [proc] is used to test the FTS3 matchinfo() function.
+#
+proc simple_token_matchinfo {zToken bDesc} {
+
+ set nDoc(0) 0
+ set nDoc(1) 0
+ set nDoc(2) 0
+ set nHit(0) 0
+ set nHit(1) 0
+ set nHit(2) 0
+
+ set dir -inc
+ if {$bDesc} { set dir -dec }
+
+ foreach key [array names ::t1] {
+ set value $::t1($key)
+ set a($key) [list]
+ foreach i {0 1 2} col $value {
+ set hit [llength [lsearch -all $col $zToken]]
+ lappend a($key) $hit
+ incr nHit($i) $hit
+ if {$hit>0} { incr nDoc($i) }
+ }
+ }
+
+ set ret [list]
+ foreach docid [lsort -integer $dir [array names a]] {
+ if { [lindex [lsort -integer $a($docid)] end] } {
+ set matchinfo [list 1 3]
+ foreach i {0 1 2} hit $a($docid) {
+ lappend matchinfo $hit $nHit($i) $nDoc($i)
+ }
+ lappend ret $docid $matchinfo
+ }
+ }
+
+ set ret
+}
+
+proc simple_near {termlist nNear} {
+ set ret [list]
+
+ foreach {key value} [array get ::t1] {
+ foreach v $value {
+
+ set l [lsearch -exact -all $v [lindex $termlist 0]]
+ foreach T [lrange $termlist 1 end] {
+ set l2 [list]
+ foreach i $l {
+ set iStart [expr $i - $nNear - 1]
+ set iEnd [expr $i + $nNear + 1]
+ if {$iStart < 0} {set iStart 0}
+ foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {
+ incr i2 $iStart
+ if {$i2 != $i} { lappend l2 $i2 }
+ }
+ }
+ set l [lsort -uniq -integer $l2]
+ }
+
+ if {[llength $l]} {
+#puts "MATCH($key): $v"
+ lappend ret $key
+ }
+ }
+ }
+
+ lsort -unique -integer $ret
+}
+
+# The following three procs:
+#
+# setup_not A B
+# setup_or A B
+# setup_and A B
+#
+# each take two arguments. Both arguments must be lists of integer values
+# sorted by value. The return value is the list produced by evaluating
+# the equivalent of "A op B", where op is the FTS3 operator NOT, OR or
+# AND.
+#
+proc setop_not {A B} {
+ foreach b $B { set n($b) {} }
+ set ret [list]
+ foreach a $A { if {![info exists n($a)]} {lappend ret $a} }
+ return $ret
+}
+proc setop_or {A B} {
+ lsort -integer -uniq [concat $A $B]
+}
+proc setop_and {A B} {
+ foreach b $B { set n($b) {} }
+ set ret [list]
+ foreach a $A { if {[info exists n($a)]} {lappend ret $a} }
+ return $ret
+}
+
+proc mit {blob} {
+ set scan(littleEndian) i*
+ set scan(bigEndian) I*
+ binary scan $blob $scan($::tcl_platform(byteOrder)) r
+ return $r
+}
+db func mit mit
+set sqlite_fts3_enable_parentheses 1
+
+proc do_orderbydocid_test {tn sql res} {
+ uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res]
+ uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \
+ [lsort -int -dec $res]
+ ]
+}
+
+set NUM_TRIALS 100
+
+foreach {nodesize order} {
+ 50 DESC
+ 50 ASC
+ 500 ASC
+ 1000 DESC
+ 2000 ASC
+} {
+ catch { array unset ::t1 }
+ set testname "$nodesize/$order"
+
+ # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.
+ #
+ db transaction {
+ catchsql { DROP TABLE t1 }
+ execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)"
+ execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"
+ for {set i 0} {$i < 100} {incr i} { insert_row $i }
+ }
+
+ for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} {
+ catchsql COMMIT
+
+ set DO_MALLOC_TEST 0
+ set nRep 10
+ if {$iTest==100 && $nodesize==50} {
+ set DO_MALLOC_TEST 1
+ set nRep 2
+ }
+
+ set ::testprefix fts3rnd-1.$testname.$iTest
+
+ # Delete one row, update one row and insert one row.
+ #
+ set rows [array names ::t1]
+ set nRow [llength $rows]
+ set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]
+ set iDelete $iUpdate
+ while {$iDelete == $iUpdate} {
+ set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
+ }
+ set iInsert $iUpdate
+ while {[info exists ::t1($iInsert)]} {
+ set iInsert [expr {int(rand()*1000000)}]
+ }
+ execsql BEGIN
+ insert_row $iInsert
+ update_row $iUpdate
+ delete_row $iDelete
+ if {0==($iTest%2)} { execsql COMMIT }
+
+ if {0==($iTest%2)} {
+ #do_test 0 { fts3_integrity_check t1 } ok
+ }
+
+ # Pick 10 terms from the vocabulary. Check that the results of querying
+ # the database for the set of documents containing each of these terms
+ # is the same as the result obtained by scanning the contents of the Tcl
+ # array for each term.
+ #
+ for {set i 0} {$i < 10} {incr i} {
+ set term [random_term]
+ do_select_test 1.$i.asc {
+ SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
+ ORDER BY docid ASC
+ } [simple_token_matchinfo $term 0]
+ do_select_test 1.$i.desc {
+ SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
+ ORDER BY docid DESC
+ } [simple_token_matchinfo $term 1]
+ }
+
+ # This time, use the first two characters of each term as a term prefix
+ # to query for. Test that querying the Tcl array produces the same results
+ # as querying the FTS3 table for the prefix.
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set prefix [string range [random_term] 0 end-1]
+ set match "${prefix}*"
+ do_orderbydocid_test 2.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_phrase $match]
+ }
+
+ # Similar to the above, except for phrase queries.
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set term [list [random_term] [random_term]]
+ set match "\"$term\""
+ do_orderbydocid_test 3.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_phrase $term]
+ }
+
+ # Three word phrases.
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set term [list [random_term] [random_term] [random_term]]
+ set match "\"$term\""
+ do_orderbydocid_test 4.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_phrase $term]
+ }
+
+ # Three word phrases made up of term-prefixes.
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set query "[string range [random_term] 0 end-1]* "
+ append query "[string range [random_term] 0 end-1]* "
+ append query "[string range [random_term] 0 end-1]*"
+
+ set match "\"$query\""
+ do_orderbydocid_test 5.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_phrase $query]
+ }
+
+ # A NEAR query with terms as the arguments:
+ #
+ # ... MATCH '$term1 NEAR $term2' ...
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set terms [list [random_term] [random_term]]
+ set match [join $terms " NEAR "]
+ do_orderbydocid_test 6.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_near $terms 10]
+ }
+
+ # A 3-way NEAR query with terms as the arguments.
+ #
+ for {set i 0} {$i < $nRep} {incr i} {
+ set terms [list [random_term] [random_term] [random_term]]
+ set nNear 11
+ set match [join $terms " NEAR/$nNear "]
+ do_orderbydocid_test 7.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [simple_near $terms $nNear]
+ }
+
+ # Set operations on simple term queries.
+ #
+ foreach {tn op proc} {
+ 8 OR setop_or
+ 9 NOT setop_not
+ 10 AND setop_and
+ } {
+ for {set i 0} {$i < $nRep} {incr i} {
+ set term1 [random_term]
+ set term2 [random_term]
+ set match "$term1 $op $term2"
+ do_orderbydocid_test $tn.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [$proc [simple_phrase $term1] [simple_phrase $term2]]
+ }
+ }
+
+ # Set operations on NEAR queries.
+ #
+ foreach {tn op proc} {
+ 11 OR setop_or
+ 12 NOT setop_not
+ 13 AND setop_and
+ } {
+ for {set i 0} {$i < $nRep} {incr i} {
+ set term1 [random_term]
+ set term2 [random_term]
+ set term3 [random_term]
+ set term4 [random_term]
+ set match "$term1 NEAR $term2 $op $term3 NEAR $term4"
+ do_orderbydocid_test $tn.$i {
+ SELECT docid FROM t1 WHERE t1 MATCH $match
+ } [$proc \
+ [simple_near [list $term1 $term2] 10] \
+ [simple_near [list $term3 $term4] 10]
+ ]
+ }
+ }
+
+ catchsql COMMIT
+ }
+}
+
+finish_test