test/whereF.test


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

# 2012 November 9
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# 
# Test cases for query planning decisions.


#
# The tests in this file demonstrate the behaviour of the query planner
# in determining the order in which joined tables are scanned.
#
# Assume there are two tables being joined - t1 and t2. Each has a cost
# if it is the outer loop, and a cost if it is the inner loop. As follows:
#
#   t1(outer) - cost of scanning t1 as the outer loop.
#   t1(inner) - cost of scanning t1 as the inner loop.
#   t2(outer) - cost of scanning t2 as the outer loop.
#   t2(inner) - cost of scanning t2 as the inner loop.
#
# Depending on the order in which the planner nests the scans, the total
# cost of the join query is one of:
#
#   t1(outer) * t2(inner)
#   t2(outer) * t1(inner)
#
# The tests in this file attempt to verify that the planner nests joins in
# the correct order when the following are true:
#
#   + (t1(outer) * t2(inner)) > (t1(inner) * t2(outer)
#   +  t1(outer) < t2(outer)
#
# In other words, when the best overall query plan has t2 as the outer loop,
# but when the outer loop is considered independent of the inner, t1 is the
# most efficient choice.
#
# In order to make them more predictable, automatic indexes are turned off for
# the tests in this file.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix whereF

do_execsql_test 1.0 {
  PRAGMA automatic_index = 0;
  CREATE TABLE t1(a, b, c);
  CREATE TABLE t2(d, e, f);
  CREATE UNIQUE INDEX i1 ON t1(a);
  CREATE UNIQUE INDEX i2 ON t2(d);
} {}

foreach {tn sql} {
  1 "SELECT * FROM t1,           t2 WHERE t1.a=t2.e AND t2.d<t1.b AND t1.c!=10"
  2 "SELECT * FROM t2,           t1 WHERE t1.a=t2.e AND t2.d<t1.b AND t1.c!=10"
  3 "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a=t2.e AND t2.d<t1.b AND t1.c!=10"
} {
  do_test 1.$tn {
    db eval "EXPLAIN QUERY PLAN $sql"
   } {/.*SCAN t2\y.*SEARCH t1\y.*/}
}

do_execsql_test 2.0 {
  DROP TABLE t1;
  DROP TABLE t2;
  CREATE TABLE t1(a, b, c);
  CREATE TABLE t2(d, e, f);

  CREATE UNIQUE INDEX i1 ON t1(a);
  CREATE UNIQUE INDEX i2 ON t1(b);
  CREATE UNIQUE INDEX i3 ON t2(d);
} {}

foreach {tn sql} {
  1 "SELECT * FROM t1,           t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"
  2 "SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"
  3 "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"
} {
  do_test 2.$tn {
    db eval "EXPLAIN QUERY PLAN $sql"
   } {/.*SCAN t2\y.*SEARCH t1\y.*/}
}

do_execsql_test 3.0 {
  DROP TABLE t1;
  DROP TABLE t2;
  CREATE TABLE t1(a, b, c);
  CREATE TABLE t2(d, e, f);

  CREATE UNIQUE INDEX i1 ON t1(a, b);
  CREATE INDEX i2 ON t2(d);
} {}

foreach {tn sql} {
  1 {SELECT t1.a, t1.b, t2.d, t2.e FROM t1, t2 
     WHERE t2.d=t1.b AND t1.a=(t2.d+1) AND t1.b = (t2.e+1)}

  2 {SELECT t1.a, t1.b, t2.d, t2.e FROM t2, t1 
     WHERE t2.d=t1.b AND t1.a=(t2.d+1) AND t1.b = (t2.e+1)}

  3 {SELECT t1.a, t1.b, t2.d, t2.e FROM t2 CROSS JOIN t1 
     WHERE t2.d=t1.b AND t1.a=(t2.d+1) AND t1.b = (t2.e+1)}
} {
  do_test 3.$tn {
    db eval "EXPLAIN QUERY PLAN $sql"
   } {/.*SCAN t2\y.*SEARCH t1\y.*/}
}

do_execsql_test 4.0 {
  CREATE TABLE t4(a,b,c,d,e, PRIMARY KEY(a,b,c));
  CREATE INDEX t4adc ON t4(a,d,c);
  CREATE UNIQUE INDEX t4aebc ON t4(a,e,b,c);
  EXPLAIN QUERY PLAN SELECT rowid FROM t4 WHERE a=? AND b=?;
} {/a=. AND b=./}

#-------------------------------------------------------------------------
# Test the following case:
#
#   ... FROM t1, t2 WHERE (
#     t2.rowid = +t1.rowid OR (t2.f2 = t1.f1 AND t1.f1!=-1)
#   )
#
# where there is an index on t2(f2). The planner should use "t1" as the
# outer loop. The inner loop, on "t2", is an OR optimization. One pass
# for:
#
#     t2.rowid = $1
#
# and another for:
#
#     t2.f2=$1 AND $1!=-1
#
# the test is to ensure that on the second pass, the ($1!=-1) condition
# is tested before any seek operations are performed - i.e. outside of
# the loop through the f2=$1 range of the t2(f2) index.
#
reset_db
do_execsql_test 5.0 {
  CREATE TABLE t1(f1);
  CREATE TABLE t2(f2);
  CREATE INDEX t2f ON t2(f2);

  INSERT INTO t1 VALUES(-1);
  INSERT INTO t1 VALUES(-1);
  INSERT INTO t1 VALUES(-1);
  INSERT INTO t1 VALUES(-1);

  WITH w(i) AS (
    SELECT 1 UNION ALL SELECT i+1 FROM w WHERE i<1000
  )
  INSERT INTO t2 SELECT -1 FROM w;
}

do_execsql_test 5.1 {
  SELECT count(*) FROM t1, t2 WHERE t2.rowid = +t1.rowid
} {4}
do_test 5.2 { expr [db status vmstep]<200 } 1

do_execsql_test 5.3 {
  SELECT count(*) FROM t1, t2 WHERE (
    t2.rowid = +t1.rowid OR t2.f2 = t1.f1
  )
} {4000}
do_test 5.4 { expr [db status vmstep]>1000 } 1

do_execsql_test 5.5 {
  SELECT count(*) FROM t1, t2 WHERE (
    t2.rowid = +t1.rowid OR (t2.f2 = t1.f1 AND t1.f1!=-1)
  )
} {4}
do_test 5.6 { expr [db status vmstep]<200 } 1

# 2017-09-04 ticket b899b6042f97f52d
# Segfault on correlated subquery...
#
ifcapable json1&&vtab {
  do_execsql_test 6.1 {
    CREATE TABLE t6(x);
    SELECT * FROM t6 WHERE 1 IN (SELECT value FROM json_each(x));
  } {}

  do_execsql_test 6.2 {
    DROP TABLE t6;
    CREATE TABLE t6(a,b,c);
    INSERT INTO t6 VALUES
     (0,null,'{"a":0,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
     (1,null,'{"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
     (2,null,'{"a":9,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}');
    SELECT * FROM t6
     WHERE (EXISTS (SELECT 1 FROM json_each(t6.c) AS x WHERE x.value=1));
  } {1 {} {{"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}}}

  # Another test case derived from a posting by Wout Mertens on the
  # sqlite-users mailing list on 2017-10-04.
  do_execsql_test 6.3 {
    DROP TABLE IF EXISTS t;
    CREATE TABLE t(json JSON);
    SELECT * FROM t
     WHERE(EXISTS(SELECT 1 FROM json_each(t.json,"$.foo") j
                   WHERE j.value = 'meep'));
  } {}
  do_execsql_test 6.4 {
    INSERT INTO t VALUES('{"xyzzy":null}');
    INSERT INTO t VALUES('{"foo":"meep","other":12345}');
    INSERT INTO t VALUES('{"foo":"bingo","alt":5.25}');
    SELECT * FROM t
     WHERE(EXISTS(SELECT 1 FROM json_each(t.json,"$.foo") j
                   WHERE j.value = 'meep'));
  } {{{"foo":"meep","other":12345}}}
}

# 2018-01-27
# Ticket https://sqlite.org/src/tktview/ec32177c99ccac2b180fd3ea2083
# Incorrect result when using the new OR clause factoring optimization
#
# This is the original test case as reported on the sqlite-users mailing
# list
#
do_execsql_test 7.1 {
  DROP TABLE IF EXISTS cd;
  CREATE TABLE cd ( cdid INTEGER PRIMARY KEY NOT NULL, genreid integer );
  CREATE INDEX cd_idx_genreid ON cd (genreid);
  INSERT INTO cd  ( cdid, genreid ) VALUES
                     ( 1,    1 ),
                     ( 2, NULL ),
                     ( 3, NULL ),
                     ( 4, NULL ),
                     ( 5, NULL );
  
  SELECT cdid
    FROM cd me
  WHERE 2 > (
    SELECT COUNT( * )
      FROM cd rownum__emulation
    WHERE
      (
        me.genreid IS NOT NULL
          AND
        rownum__emulation.genreid IS NULL
      )
        OR
      (
        me.genreid IS NOT NULL
          AND
        rownum__emulation.genreid IS NOT NULL
          AND
        rownum__emulation.genreid < me.genreid
      )
        OR
      (
        ( me.genreid = rownum__emulation.genreid OR ( me.genreid IS NULL
  AND rownum__emulation.genreid IS NULL ) )
          AND
        rownum__emulation.cdid > me.cdid
      )
  );
} {4 5}

# Simplified test cases from the ticket
#
do_execsql_test 7.2 {
  DROP TABLE IF EXISTS t1;
  DROP TABLE IF EXISTS t2;
  CREATE TABLE t1(a INTEGER PRIMARY KEY, b);
  INSERT INTO t1(a,b) VALUES(1,1);
  CREATE TABLE t2(aa INTEGER PRIMARY KEY, bb);
  INSERT INTO t2(aa,bb) VALUES(1,1),(2,NULL),(3,NULL);
  SELECT (
    SELECT COUNT(*) FROM t2
     WHERE ( t1.b IS NOT NULL AND t2.bb IS NULL )
        OR ( t2.bb < t1.b )
        OR ( t1.b IS t2.bb AND t2.aa > t1.a )
    )
    FROM t1;
} {2}

# The fix for ticket ec32177c99ccac2b180fd3ea2083 only makes a difference
# in the output when there is a TERM_VNULL entry in the WhereClause array.
# And TERM_VNULL entries are only generated when compiling with 
# SQLITE_ENABLE_STAT4.  Nevertheless, it is correct that TERM_VIRTUAL terms
# should not participate in the factoring optimization.  In all cases other
# than TERM_VNULL, participation is harmless, but it does consume a few
# extra CPU cycles.
#
# The following test verifies that the TERM_VIRTUAL terms resulting from
# a GLOB operator do not appear anywhere in the generated code.  This
# confirms that the problem is fixed, even on builds that omit STAT4.
#
do_execsql_test 7.3 {
  DROP TABLE IF EXISTS t1;
  DROP TABLE IF EXISTS t2;
  CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT);
  INSERT INTO t1(a,b) VALUES(1,'abcxyz');
  CREATE TABLE t2(aa INTEGER PRIMARY KEY, bb TEXT);
  INSERT INTO t2(aa,bb) VALUES(1,'abc'),(2,'wxyz'),(3,'xyz');
  CREATE INDEX t2bb ON t2(bb);
  EXPLAIN SELECT (
    SELECT COUNT(*) FROM t2
     WHERE ( t1.b GLOB 'a*z' AND t2.bb='xyz' )
        OR ( t2.bb = t1.b )
        OR ( t2.aa = t1.a )
    )
    FROM t1;
} {~/ (Lt|Ge) /}

finish_test