summaryrefslogtreecommitdiffstats
path: root/ext/fts5/test/fts5hash.test
blob: 5df55f226fc8c94ceb623f3a7b7b284afd5709fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# 2015 April 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file are focused on the code in fts5_hash.c.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5hash

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Return a list of tokens (a vocabulary) that all share the same hash
# key value. This can be used to test hash collisions.
#
proc build_vocab1 {args} {

  set O(-nslot) 1024
  set O(-nword)   20
  set O(-hash)    88
  set O(-prefix)  ""

  if {[llength $args] % 2} { error "bad args" }
  array set O2 $args
  foreach {k v} $args {
    if {[info exists O($k)]==0} { error "bad option: $k" }
    set O($k) $v
  }

  set L [list]
  while {[llength $L] < $O(-nword)} {
    set t "$O(-prefix)[random_token]"
    set h [sqlite3_fts5_token_hash $O(-nslot) $t]
    if {$O(-hash)==$h} { lappend L $t }
  }
  return $L
}

proc random_token {} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set iVal [expr int(rand() * 2000000)]
  return [string map $map $iVal]
}

proc random_doc {vocab nWord} {
  set doc ""
  set nVocab [llength $vocab]
  for {set i 0} {$i<$nWord} {incr i} {
    set j [expr {int(rand() * $nVocab)}]
    lappend doc [lindex $vocab $j]
  }
  return $doc
}

foreach_detail_mode $testprefix {

  set vocab [build_vocab1]
  db func r random_doc 
  
  do_execsql_test 1.0 {
    CREATE VIRTUAL TABLE eee USING fts5(e, ee, detail=%DETAIL%);
    BEGIN;
      WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
      INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
      INSERT INTO eee(eee) VALUES('integrity-check');
    COMMIT;
    INSERT INTO eee(eee) VALUES('integrity-check');
  }
  
  set hash [sqlite3_fts5_token_hash 1024 xyz]
  set vocab [build_vocab1 -prefix xyz -hash $hash]
  lappend vocab xyz
  
  do_execsql_test 1.1 {
    CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row'); 
    BEGIN;
  }
  do_test 1.2 {
    for {set i 1} {$i <= 100} {incr i} {
      execsql { INSERT INTO eee VALUES( r($vocab, 5), r($vocab, 7) ) }
    }
  } {}
    
  do_test 1.3 {
    db eval { SELECT term, doc FROM vocab } {
      set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
      if {$nRow != $doc} {
        error "term=$term fts5vocab=$doc cnt=$nRow"
      }
    }
    set {} {}
  } {}
  
  do_execsql_test 1.4 {
    COMMIT;
    INSERT INTO eee(eee) VALUES('integrity-check');
  }

  #-----------------------------------------------------------------------
  # Add a small and very large token with the same hash value to an
  # empty table. At one point this would provoke an asan error.
  #
  do_test 1.5 {
    set big [string repeat 12345 40]
    set hash [sqlite3_fts5_token_hash 1024 $big]
    while {1} {
      set small [random_token]
      if {[sqlite3_fts5_token_hash 1024 $small]==$hash} break
    }

    execsql { CREATE VIRTUAL TABLE t2 USING fts5(x, detail=%DETAIL%) }
    execsql {
      INSERT INTO t2 VALUES($small || ' ' || $big);
    }
  } {}

} ;# foreach_detail_mode

#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
  INSERT INTO t1(t1, rank) VALUES('automerge', 0);
  INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
}

do_execsql_test 2.2 {
  BEGIN;
    INSERT INTO t1 VALUES('abc def ghi');
    SELECT count(*) FROM t1_data;
} {2}

do_execsql_test 2.3 {
  WITH s(i) AS (
    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
  )
  INSERT INTO t1 SELECT 'abc def ghi' FROM s;
  SELECT (SELECT count(*) FROM t1_data) > 10;
} {1}

do_execsql_test 2.4 {
  COMMIT;
  DROP TABLE t1;
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
  INSERT INTO t1(t1, rank) VALUES('automerge', 0);
  INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
  WITH s(i) AS (
    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
  )
  INSERT INTO t1 SELECT 'abc' || i || ' def' || i || ' ghi' || i FROM s;
  SELECT (SELECT count(*) FROM t1_data) > 100;
} {1}

finish_test