summaryrefslogtreecommitdiffstats
path: root/test/bloom1.test
blob: 151f364ae0a66343199e3567124232bd67bfe915 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# 2022 October 06
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests for queries that use bloom filters

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/lock_common.tcl
source $testdir/malloc_common.tcl

set testprefix bloom1

# Tests 1.*  verify that the bloom filter code correctly handles the
# case where the RHS of an (<ipk-column> = ?) expression must be coerced
# to an integer before the comparison made.
#
do_execsql_test 1.0 {
  CREATE TABLE t1(a, b);
  CREATE TABLE t2(c INTEGER PRIMARY KEY, d);
}

do_execsql_test 1.1 {
  INSERT INTO t1 VALUES('hello', 'world');
  INSERT INTO t2 VALUES(14, 'fourteen');
}

do_execsql_test 1.2 {
  ANALYZE sqlite_schema;
  INSERT INTO sqlite_stat1 VALUES('t2','idx1','6 6');
  ANALYZE sqlite_schema;
}

do_execsql_test 1.3 {
  SELECT 'affinity!' FROM t1 CROSS JOIN t2 WHERE t2.c = '14';
} {affinity!}


reset_db
do_execsql_test 1.4 {
  CREATE TABLE t1(a, b TEXT);
  CREATE TABLE t2(c INTEGER PRIMARY KEY, d);
  CREATE TABLE t3(e INTEGER PRIMARY KEY, f);

  ANALYZE sqlite_schema;
  INSERT INTO sqlite_stat1 VALUES('t1','idx1','600 6');
  INSERT INTO sqlite_stat1 VALUES('t2','idx1','6 6');
  INSERT INTO sqlite_stat1 VALUES('t3','idx2','6 6');
  ANALYZE sqlite_schema;

  INSERT INTO t1 VALUES(1, '123');
  INSERT INTO t2 VALUES(123, 'one');
  INSERT INTO t3 VALUES(123, 'two');
}

do_execsql_test 1.5 {
  SELECT 'result' FROM t1, t2, t3 
  WHERE t2.c=t1.b AND t2.d!='silly'
    AND t3.e=t1.b AND t3.f!='silly'
} {result}

# 2023-02-05
# https://sqlite.org/forum/forumpost/56de336385
#
# Do not employ a Bloom filter if the table being filtered or any table
# wo the left of the table being filtered lacks STAT1 data, since we
# cannot make a good Bloom filter usefulness determination without STAT1
# data.
#
reset_db
do_execsql_test 2.0 {
  CREATE TABLE objs(c INTEGER, s INTEGER, p INTEGER, o INTEGER);
  CREATE UNIQUE INDEX objs_cspo ON objs(o,p,c,s);
  ANALYZE;
  DELETE FROM sqlite_stat1;
  INSERT INTO sqlite_stat1 VALUES('objs','objs_cspo','520138 21 20 19 1');
  ANALYZE sqlite_schema;
}
do_eqp_test 2.1 {
  WITH RECURSIVE transit(x) AS (
     SELECT s FROM objs WHERE p=9 AND o=32805
     UNION
     SELECT objs.s FROM objs, transit WHERE objs.p=9 AND objs.o=transit.x
  )
  SELECT x FROM transit;
} {
  QUERY PLAN
  |--CO-ROUTINE transit
  |  |--SETUP
  |  |  `--SEARCH objs USING COVERING INDEX objs_cspo (o=? AND p=?)
  |  `--RECURSIVE STEP
  |     |--SCAN transit
  |     `--SEARCH objs USING COVERING INDEX objs_cspo (o=? AND p=?)
  `--SCAN transit
}

# 2023-02-28
# https://sqlite.org/forum/forumpost/0846211821
#
# Bloom filter gives an incorrect result if the collating sequence is
# anything other than binary.
#
reset_db
do_execsql_test 3.1 {
  CREATE TABLE t0(x TEXT COLLATE rtrim);
  INSERT INTO t0(x) VALUES ('a'), ('b'), ('c');
  CREATE VIEW v0(y) AS SELECT DISTINCT x FROM t0;
  SELECT count(*) FROM t0, v0 WHERE x='b ';
} 3
do_eqp_test 3.2 {
  SELECT count(*) FROM t0, v0 WHERE x='b ';
} {
  QUERY PLAN
  |--CO-ROUTINE v0
  |  |--SCAN t0
  |  `--USE TEMP B-TREE FOR DISTINCT
  |--SCAN v0
  `--SEARCH t0 USING AUTOMATIC PARTIAL COVERING INDEX (x=?)
}
# ^^^^^--- The key feature in the previous result is that no Bloom filter
# is used.  In the following, a Bloom filter is used because the data type
# is INT instead of TEXT.
do_execsql_test 3.3 {
  CREATE TABLE t1(x INT COLLATE rtrim);
  INSERT INTO t1(x) VALUES ('a'), ('b'), ('c');
  CREATE VIEW v1(y) AS SELECT DISTINCT x FROM t1;
  SELECT count(*) FROM t1, v1 WHERE x='b ';
} 3
do_eqp_test 3.4 {
  SELECT count(*) FROM t1, v1 WHERE x='b ';
} {
  QUERY PLAN
  |--CO-ROUTINE v1
  |  |--SCAN t1
  |  `--USE TEMP B-TREE FOR DISTINCT
  |--SCAN v1
  |--BLOOM FILTER ON t1 (x=?)
  `--SEARCH t1 USING AUTOMATIC PARTIAL COVERING INDEX (x=?)
}

# 2023-03-14 
# https://sqlite.org/forum/forumpost/d47a0e8e3a
# https://sqlite.org/forum/forumpost/2e427099d5
#
# Both reports are for the same problem - using a Bloom filter on an
# expression index can cause issues.
#
reset_db
do_execsql_test 4.1 {
  CREATE TABLE t1(x TEXT, y INT, z TEXT);
  INSERT INTO t1(rowid,x,y,z) VALUES(12,'aa','bb','aa');
  CREATE INDEX i1x ON t1(1 IS true,z);
  CREATE TABLE t0(x TEXT);
  INSERT INTO t0(rowid,x) VALUES(4,'aa');
  ANALYZE sqlite_schema;
  INSERT INTO sqlite_stat1 VALUES('t0',NULL,'20');
  INSERT INTO sqlite_stat1 VALUES('t1','i1x','18 18 2');
  ANALYZE sqlite_schema;
}
do_execsql_test 4.2 {
  SELECT * FROM t0 NATURAL JOIN t1 WHERE z=t1.x;
} {aa bb aa}
do_execsql_test 4.3 {
  DROP TABLE t0;
  CREATE TABLE t0(a TEXT);
  INSERT INTO t0 VALUES ('xyz');
  CREATE INDEX t0x ON t0(a IS FALSE) WHERE false;
  DROP TABLE t1;
  CREATE TABLE t1(b INT);
  INSERT INTO t1 VALUES('aaa'),('bbb'),('ccc'),('ddd'),(NULL);
  CREATE TABLE t2(c REAL);
  INSERT INTO t2 VALUES(7);
  ANALYZE;
  CREATE INDEX t2x ON t2(true IN ());
}
do_execsql_test 4.4 {
  SELECT * FROM t0 LEFT JOIN t1 LEFT JOIN t2 ON (b NOTNULL)==(c IN ()) WHERE c;
} {xyz {} 7.0}
 

finish_test