summaryrefslogtreecommitdiffstats
path: root/src/test/regress/sql/brin_bloom.sql
blob: 5d499208e38428bfdbcb3835c1e510cf1eb68975 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
CREATE TABLE brintest_bloom (byteacol bytea,
	charcol "char",
	namecol name,
	int8col bigint,
	int2col smallint,
	int4col integer,
	textcol text,
	oidcol oid,
	float4col real,
	float8col double precision,
	macaddrcol macaddr,
	inetcol inet,
	cidrcol cidr,
	bpcharcol character,
	datecol date,
	timecol time without time zone,
	timestampcol timestamp without time zone,
	timestamptzcol timestamp with time zone,
	intervalcol interval,
	timetzcol time with time zone,
	numericcol numeric,
	uuidcol uuid,
	lsncol pg_lsn
) WITH (fillfactor=10);

INSERT INTO brintest_bloom SELECT
	repeat(stringu1, 8)::bytea,
	substr(stringu1, 1, 1)::"char",
	stringu1::name, 142857 * tenthous,
	thousand,
	twothousand,
	repeat(stringu1, 8),
	unique1::oid,
	(four + 1.0)/(hundred+1),
	odd::float8 / (tenthous + 1),
	format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
	inet '10.2.3.4/24' + tenthous,
	cidr '10.2.3/24' + tenthous,
	substr(stringu1, 1, 1)::bpchar,
	date '1995-08-15' + tenthous,
	time '01:20:30' + thousand * interval '18.5 second',
	timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
	timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
	justify_days(justify_hours(tenthous * interval '12 minutes')),
	timetz '01:30:20+02' + hundred * interval '15 seconds',
	tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
	format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
	format('%s/%s%s', odd, even, tenthous)::pg_lsn
FROM tenk1 ORDER BY unique2 LIMIT 100;

-- throw in some NULL's and different values
INSERT INTO brintest_bloom (inetcol, cidrcol) SELECT
	inet 'fe80::6e40:8ff:fea9:8c46' + tenthous,
	cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous
FROM tenk1 ORDER BY thousand, tenthous LIMIT 25;

-- test bloom specific index options
-- ndistinct must be >= -1.0
CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
	byteacol bytea_bloom_ops(n_distinct_per_range = -1.1)
);
-- false_positive_rate must be between 0.0001 and 0.25
CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
	byteacol bytea_bloom_ops(false_positive_rate = 0.00009)
);
CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
	byteacol bytea_bloom_ops(false_positive_rate = 0.26)
);

CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
	byteacol bytea_bloom_ops,
	charcol char_bloom_ops,
	namecol name_bloom_ops,
	int8col int8_bloom_ops,
	int2col int2_bloom_ops,
	int4col int4_bloom_ops,
	textcol text_bloom_ops,
	oidcol oid_bloom_ops,
	float4col float4_bloom_ops,
	float8col float8_bloom_ops,
	macaddrcol macaddr_bloom_ops,
	inetcol inet_bloom_ops,
	cidrcol inet_bloom_ops,
	bpcharcol bpchar_bloom_ops,
	datecol date_bloom_ops,
	timecol time_bloom_ops,
	timestampcol timestamp_bloom_ops,
	timestamptzcol timestamptz_bloom_ops,
	intervalcol interval_bloom_ops,
	timetzcol timetz_bloom_ops,
	numericcol numeric_bloom_ops,
	uuidcol uuid_bloom_ops,
	lsncol pg_lsn_bloom_ops
) with (pages_per_range = 1);

CREATE TABLE brinopers_bloom (colname name, typ text,
	op text[], value text[], matches int[],
	check (cardinality(op) = cardinality(value)),
	check (cardinality(op) = cardinality(matches)));

INSERT INTO brinopers_bloom VALUES
	('byteacol', 'bytea',
	 '{=}',
	 '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
	 '{1}'),
	('charcol', '"char"',
	 '{=}',
	 '{M}',
	 '{6}'),
	('namecol', 'name',
	 '{=}',
	 '{MAAAAA}',
	 '{2}'),
	('int2col', 'int2',
	 '{=}',
	 '{800}',
	 '{1}'),
	('int4col', 'int4',
	 '{=}',
	 '{800}',
	 '{1}'),
	('int8col', 'int8',
	 '{=}',
	 '{1257141600}',
	 '{1}'),
	('textcol', 'text',
	 '{=}',
	 '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
	 '{1}'),
	('oidcol', 'oid',
	 '{=}',
	 '{8800}',
	 '{1}'),
	('float4col', 'float4',
	 '{=}',
	 '{1}',
	 '{4}'),
	('float8col', 'float8',
	 '{=}',
	 '{0}',
	 '{1}'),
	('macaddrcol', 'macaddr',
	 '{=}',
	 '{2c:00:2d:00:16:00}',
	 '{2}'),
	('inetcol', 'inet',
	 '{=}',
	 '{10.2.14.231/24}',
	 '{1}'),
	('inetcol', 'cidr',
	 '{=}',
	 '{fe80::6e40:8ff:fea9:8c46}',
	 '{1}'),
	('cidrcol', 'inet',
	 '{=}',
	 '{10.2.14/24}',
	 '{2}'),
	('cidrcol', 'inet',
	 '{=}',
	 '{fe80::6e40:8ff:fea9:8c46}',
	 '{1}'),
	('cidrcol', 'cidr',
	 '{=}',
	 '{10.2.14/24}',
	 '{2}'),
	('cidrcol', 'cidr',
	 '{=}',
	 '{fe80::6e40:8ff:fea9:8c46}',
	 '{1}'),
	('bpcharcol', 'bpchar',
	 '{=}',
	 '{W}',
	 '{6}'),
	('datecol', 'date',
	 '{=}',
	 '{2009-12-01}',
	 '{1}'),
	('timecol', 'time',
	 '{=}',
	 '{02:28:57}',
	 '{1}'),
	('timestampcol', 'timestamp',
	 '{=}',
	 '{1964-03-24 19:26:45}',
	 '{1}'),
	('timestamptzcol', 'timestamptz',
	 '{=}',
	 '{1972-10-19 09:00:00-07}',
	 '{1}'),
	('intervalcol', 'interval',
	 '{=}',
	 '{1 mons 13 days 12:24}',
	 '{1}'),
	('timetzcol', 'timetz',
	 '{=}',
	 '{01:35:50+02}',
	 '{2}'),
	('numericcol', 'numeric',
	 '{=}',
	 '{2268164.347826086956521739130434782609}',
	 '{1}'),
	('uuidcol', 'uuid',
	 '{=}',
	 '{52225222-5222-5222-5222-522252225222}',
	 '{1}'),
	('lsncol', 'pg_lsn',
	 '{=, IS, IS NOT}',
	 '{44/455222, NULL, NULL}',
	 '{1, 25, 100}');

DO $x$
DECLARE
	r record;
	r2 record;
	cond text;
	idx_ctids tid[];
	ss_ctids tid[];
	count int;
	plan_ok bool;
	plan_line text;
BEGIN
	FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers_bloom, unnest(op) WITH ORDINALITY AS oper LOOP

		-- prepare the condition
		IF r.value IS NULL THEN
			cond := format('%I %s %L', r.colname, r.oper, r.value);
		ELSE
			cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ);
		END IF;

		-- run the query using the brin index
		SET enable_seqscan = 0;
		SET enable_bitmapscan = 1;

		plan_ok := false;
		FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
			IF plan_line LIKE '%Bitmap Heap Scan on brintest_bloom%' THEN
				plan_ok := true;
			END IF;
		END LOOP;
		IF NOT plan_ok THEN
			RAISE WARNING 'did not get bitmap indexscan plan for %', r;
		END IF;

		EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
			INTO idx_ctids;

		-- run the query using a seqscan
		SET enable_seqscan = 1;
		SET enable_bitmapscan = 0;

		plan_ok := false;
		FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
			IF plan_line LIKE '%Seq Scan on brintest_bloom%' THEN
				plan_ok := true;
			END IF;
		END LOOP;
		IF NOT plan_ok THEN
			RAISE WARNING 'did not get seqscan plan for %', r;
		END IF;

		EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
			INTO ss_ctids;

		-- make sure both return the same results
		count := array_length(idx_ctids, 1);

		IF NOT (count = array_length(ss_ctids, 1) AND
				idx_ctids @> ss_ctids AND
				idx_ctids <@ ss_ctids) THEN
			-- report the results of each scan to make the differences obvious
			RAISE WARNING 'something not right in %: count %', r, count;
			SET enable_seqscan = 1;
			SET enable_bitmapscan = 0;
			FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
				RAISE NOTICE 'seqscan: %', r2;
			END LOOP;

			SET enable_seqscan = 0;
			SET enable_bitmapscan = 1;
			FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
				RAISE NOTICE 'bitmapscan: %', r2;
			END LOOP;
		END IF;

		-- make sure we found expected number of matches
		IF count != r.matches THEN RAISE WARNING 'unexpected number of results % for %', count, r; END IF;
	END LOOP;
END;
$x$;

RESET enable_seqscan;
RESET enable_bitmapscan;

INSERT INTO brintest_bloom SELECT
	repeat(stringu1, 42)::bytea,
	substr(stringu1, 1, 1)::"char",
	stringu1::name, 142857 * tenthous,
	thousand,
	twothousand,
	repeat(stringu1, 42),
	unique1::oid,
	(four + 1.0)/(hundred+1),
	odd::float8 / (tenthous + 1),
	format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
	inet '10.2.3.4' + tenthous,
	cidr '10.2.3/24' + tenthous,
	substr(stringu1, 1, 1)::bpchar,
	date '1995-08-15' + tenthous,
	time '01:20:30' + thousand * interval '18.5 second',
	timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
	timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
	justify_days(justify_hours(tenthous * interval '12 minutes')),
	timetz '01:30:20' + hundred * interval '15 seconds',
	tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
	format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
	format('%s/%s%s', odd, even, tenthous)::pg_lsn
FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;

SELECT brin_desummarize_range('brinidx_bloom', 0);
VACUUM brintest_bloom;  -- force a summarization cycle in brinidx

UPDATE brintest_bloom SET int8col = int8col * int4col;
UPDATE brintest_bloom SET textcol = '' WHERE textcol IS NOT NULL;

-- Tests for brin_summarize_new_values
SELECT brin_summarize_new_values('brintest_bloom'); -- error, not an index
SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
SELECT brin_summarize_new_values('brinidx_bloom'); -- ok, no change expected

-- Tests for brin_desummarize_range
SELECT brin_desummarize_range('brinidx_bloom', -1); -- error, invalid range
SELECT brin_desummarize_range('brinidx_bloom', 0);
SELECT brin_desummarize_range('brinidx_bloom', 0);
SELECT brin_desummarize_range('brinidx_bloom', 100000000);

-- Test brin_summarize_range
CREATE TABLE brin_summarize_bloom (
    value int
) WITH (fillfactor=10, autovacuum_enabled=false);
CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2);
-- Fill a few pages
DO $$
DECLARE curtid tid;
BEGIN
  LOOP
    INSERT INTO brin_summarize_bloom VALUES (1) RETURNING ctid INTO curtid;
    EXIT WHEN curtid > tid '(2, 0)';
  END LOOP;
END;
$$;

-- summarize one range
SELECT brin_summarize_range('brin_summarize_bloom_idx', 0);
-- nothing: already summarized
SELECT brin_summarize_range('brin_summarize_bloom_idx', 1);
-- summarize one range
SELECT brin_summarize_range('brin_summarize_bloom_idx', 2);
-- nothing: page doesn't exist in table
SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967295);
-- invalid block number values
SELECT brin_summarize_range('brin_summarize_bloom_idx', -1);
SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967296);


-- test brin cost estimates behave sanely based on correlation of values
CREATE TABLE brin_test_bloom (a INT, b INT);
INSERT INTO brin_test_bloom SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
CREATE INDEX brin_test_bloom_a_idx ON brin_test_bloom USING brin (a) WITH (pages_per_range = 2);
CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2);
VACUUM ANALYZE brin_test_bloom;

-- Ensure brin index is used when columns are perfectly correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE a = 1;
-- Ensure brin index is not used when values are not correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE b = 1;