diff options
Diffstat (limited to 'src/test/regress/expected/stats_ext.out')
-rw-r--r-- | src/test/regress/expected/stats_ext.out | 3256 |
1 files changed, 3256 insertions, 0 deletions
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 0000000..42a01da --- /dev/null +++ b/src/test/regress/expected/stats_ext.out @@ -0,0 +1,3256 @@ +-- Generic extended statistics support +-- +-- Note: tables for which we check estimated row counts should be created +-- with autovacuum_enabled = off, so that we don't have unstable results +-- from auto-analyze happening when we didn't expect it. +-- +-- check the number of estimated/actual rows in the top node +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Verify failures +CREATE TABLE ext_stats_test (x text, y int, z int); +CREATE STATISTICS tst; +ERROR: syntax error at or near ";" +LINE 1: CREATE STATISTICS tst; + ^ +CREATE STATISTICS tst ON a, b; +ERROR: syntax error at or near ";" +LINE 1: CREATE STATISTICS tst ON a, b; + ^ +CREATE STATISTICS tst FROM sometab; +ERROR: syntax error at or near "FROM" +LINE 1: CREATE STATISTICS tst FROM sometab; + ^ +CREATE STATISTICS tst ON a, b FROM nonexistent; +ERROR: relation "nonexistent" does not exist +CREATE STATISTICS tst ON a, b FROM ext_stats_test; +ERROR: column "a" does not exist +CREATE STATISTICS tst ON x, x, y FROM ext_stats_test; +ERROR: duplicate column name in statistics definition +CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test; +ERROR: duplicate expression in statistics definition +CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; +ERROR: unrecognized statistics kind "unrecognized" +-- incorrect expressions +CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference +ERROR: extended statistics require at least 2 columns +CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses +ERROR: syntax error at or near "+" +LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test; + ^ +CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression +ERROR: syntax error at or near "," +LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; + ^ +DROP TABLE ext_stats_test; +-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; +COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment'; +CREATE ROLE regress_stats_ext; +SET SESSION AUTHORIZATION regress_stats_ext; +COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment'; +ERROR: must be owner of statistics object ab1_a_b_stats +DROP STATISTICS ab1_a_b_stats; +ERROR: must be owner of statistics object ab1_a_b_stats +ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new; +ERROR: must be owner of statistics object ab1_a_b_stats +RESET SESSION AUTHORIZATION; +DROP ROLE regress_stats_ext; +CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; +NOTICE: statistics object "ab1_a_b_stats" already exists, skipping +DROP STATISTICS ab1_a_b_stats; +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1; +-- Let's also verify the pg_get_statisticsobjdef output looks sane. +SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats'; + pg_get_statisticsobjdef +------------------------------------------------------------------- + CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1 +(1 row) + +DROP STATISTICS regress_schema_2.ab1_a_b_stats; +-- Ensure statistics are dropped when columns are +CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1; +CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | + c | integer | | | +Statistics objects: + "public.ab1_b_c_stats" ON b, c FROM ab1 + +-- Ensure statistics are dropped when table is +SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; + stxname +--------------- + ab1_b_c_stats +(1 row) + +DROP TABLE ab1; +SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; + stxname +--------- +(0 rows) + +-- Ensure things work sanely with SET STATISTICS 0 +CREATE TABLE ab1 (a INTEGER, b INTEGER); +ALTER TABLE ab1 ALTER a SET STATISTICS 0; +INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; +CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; +ANALYZE ab1; +WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" +ALTER TABLE ab1 ALTER a SET STATISTICS -1; +-- setting statistics target 0 skips the statistics, without printing any message, so check catalog +ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | +Statistics objects: + "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0 + +ANALYZE ab1; +SELECT stxname, stxdndistinct, stxddependencies, stxdmcv + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxname = 'ab1_a_b_stats' + AND d.stxoid = s.oid; + stxname | stxdndistinct | stxddependencies | stxdmcv +---------------+---------------+------------------+--------- + ab1_a_b_stats | | | +(1 row) + +ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; +\d+ ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | +Statistics objects: + "public.ab1_a_b_stats" ON a, b FROM ab1 + +-- partial analyze doesn't build stats either +ANALYZE ab1 (a); +WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" +ANALYZE ab1; +DROP TABLE ab1; +ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; +ERROR: statistics object "ab1_a_b_stats" does not exist +ALTER STATISTICS IF EXISTS ab1_a_b_stats SET STATISTICS 0; +NOTICE: statistics object "ab1_a_b_stats" does not exist, skipping +-- Ensure we can build statistics for tables with inheritance. +CREATE TABLE ab1 (a INTEGER, b INTEGER); +CREATE TABLE ab1c () INHERITS (ab1); +INSERT INTO ab1 VALUES (1,1); +CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; +ANALYZE ab1; +DROP TABLE ab1 CASCADE; +NOTICE: drop cascades to table ab1c +-- Tests for stats with inheritance +CREATE TABLE stxdinh(a int, b int); +CREATE TABLE stxdinh1() INHERITS(stxdinh); +CREATE TABLE stxdinh2() INHERITS(stxdinh); +INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a; +INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Ensure non-inherited stats are not applied to inherited query +-- Without stats object, it looks like this +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); + estimated | actual +-----------+-------- + 400 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 3 | 40 +(1 row) + +CREATE STATISTICS stxdinh ON a, b FROM stxdinh; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Since the stats object does not include inherited stats, it should not +-- affect the estimates +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); + estimated | actual +-----------+-------- + 400 | 150 +(1 row) + +-- Dependencies are applied at individual relations (within append), so +-- this estimate changes a bit because we improve estimates for the parent +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 22 | 40 +(1 row) + +DROP TABLE stxdinh, stxdinh1, stxdinh2; +-- Ensure inherited stats ARE applied to inherited query in partitioned table +CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i); +CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100); +INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a; +CREATE STATISTICS stxdinp ON a, b FROM stxdinp; +VACUUM ANALYZE stxdinp; -- partitions are processed recursively +SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2'); + estimated | actual +-----------+-------- + 10 | 10 +(1 row) + +DROP TABLE stxdinp; +-- basic test for statistics on expressions +CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ); +-- expression stats may be built on a single expression column +CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1; +-- with a single expression, we only enable expression statistics +CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1; +SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2'; + stxkind +--------- + {e} +(1 row) + +-- adding anything to the expression builds all statistics kinds +CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1; +SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3'; + stxkind +----------- + {d,f,m,e} +(1 row) + +-- date_trunc on timestamptz is not immutable, but that should not matter +CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; +-- date_trunc on timestamp is immutable +CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; +-- insert some data and run analyze, to test that these cases build properly +INSERT INTO ab1 +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; +ANALYZE ab1; +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +DROP TABLE ab1; +-- Verify supported object types for extended statistics +CREATE schema tststats; +CREATE TABLE tststats.t (a int, b int, c text); +CREATE INDEX ti ON tststats.t (a, b); +CREATE SEQUENCE tststats.s; +CREATE VIEW tststats.v AS SELECT * FROM tststats.t; +CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t; +CREATE TYPE tststats.ty AS (a int, b int, c text); +CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw; +CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw; +CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv; +CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b); +CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10); +CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t; +CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti; +ERROR: relation "ti" is not a table, foreign table, or materialized view +CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s; +ERROR: relation "s" is not a table, foreign table, or materialized view +CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v; +ERROR: relation "v" is not a table, foreign table, or materialized view +CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv; +CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty; +ERROR: relation "ty" is not a table, foreign table, or materialized view +CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f; +CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt; +CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1; +DO $$ +DECLARE + relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass; +BEGIN + EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname; +EXCEPTION WHEN wrong_object_type THEN + RAISE NOTICE 'stats on toast table not created'; +END; +$$; +NOTICE: stats on toast table not created +DROP SCHEMA tststats CASCADE; +NOTICE: drop cascades to 7 other objects +DETAIL: drop cascades to table tststats.t +drop cascades to sequence tststats.s +drop cascades to view tststats.v +drop cascades to materialized view tststats.mv +drop cascades to type tststats.ty +drop cascades to foreign table tststats.f +drop cascades to table tststats.pt +DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE; +NOTICE: drop cascades to server extstats_dummy_srv +-- n-distinct tests +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +) +WITH (autovacuum_enabled = off); +-- over-estimates when using only per-column statistics +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words((i/100)::money) + FROM generate_series(1,1000) s(i); +ANALYZE ndistinct; +-- Group Aggregate, due to over-estimate of the number of groups +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d'); + estimated | actual +-----------+-------- + 200 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d'); + estimated | actual +-----------+-------- + 200 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +-- correct command +CREATE STATISTICS s10 ON a, b, c FROM ndistinct; +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+----------------------------------------------------- + {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} +(1 row) + +-- minor improvement, make sure the ctid does not break the matching +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +-- Hash Aggregate, thanks to estimates improved by the statistic +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +-- partial improvement (match on attributes) +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +-- expressions - no improvement +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +-- last two plans keep using Group Aggregate, because 'd' is not covered +-- by the statistic and while it's NULL-only we assume 200 values for it +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d'); + estimated | actual +-----------+-------- + 200 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d'); + estimated | actual +-----------+-------- + 200 | 11 +(1 row) + +TRUNCATE TABLE ndistinct; +-- under-estimates when using only per-column statistics +INSERT INTO ndistinct (a, b, c, filler1) + SELECT mod(i,13), mod(i,17), mod(i,19), + cash_words(mod(i,23)::int::money) + FROM generate_series(1,1000) s(i); +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+---------------------------------------------------------- + {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000} +(1 row) + +-- correct estimates +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d'); + estimated | actual +-----------+-------- + 323 | 323 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d'); + estimated | actual +-----------+-------- + 200 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +DROP STATISTICS s10; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+--------------- +(0 rows) + +-- dropping the statistics results in under-estimates +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d'); + estimated | actual +-----------+-------- + 200 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d'); + estimated | actual +-----------+-------- + 200 | 323 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d'); + estimated | actual +-----------+-------- + 200 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +-- ndistinct estimates with statistics on expressions +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct; +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+------------------------------------------------------------------- + {d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +DROP STATISTICS s10; +-- a mix of attributes and expressions +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)'); + estimated | actual +-----------+-------- + 100 | 247 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct; +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+------------------------------------------------------------- + {d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)'); + estimated | actual +-----------+-------- + 247 | 247 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +DROP STATISTICS s10; +-- combination of multiple ndistinct statistics, with/without expressions +TRUNCATE ndistinct; +-- two mostly independent groups of columns +INSERT INTO ndistinct (a, b, c, d) + SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20) + FROM generate_series(1,1000) s(i); +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- basic statistics on both attributes (no expressions) +CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct; +CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the second statistics by statistics on expressions +DROP STATISTICS s12; +CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the second statistics by statistics on both attributes and expressions +DROP STATISTICS s12; +CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the other statistics by statistics on both attributes and expressions +DROP STATISTICS s11; +CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace statistics by somewhat overlapping ones (this expected to get worse estimate +-- because the first statistics shall be applied to 3 columns, and the second one can't +-- be really applied) +DROP STATISTICS s11; +DROP STATISTICS s12; +CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct; +CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +DROP STATISTICS s11; +DROP STATISTICS s12; +-- functional dependencies tests +CREATE TABLE functional_dependencies ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b TEXT, + filler3 DATE, + c INT, + d TEXT +) +WITH (autovacuum_enabled = off); +CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b); +CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c); +-- random data (no functional dependencies) +INSERT INTO functional_dependencies (a, b, c, filler1) + SELECT mod(i, 5), mod(i, 7), mod(i, 11), i FROM generate_series(1,1000) s(i); +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 29 | 29 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 3 | 3 +(1 row) + +-- create statistics +CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 29 | 29 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 3 | 3 +(1 row) + +-- a => b, a => c, b => c +TRUNCATE functional_dependencies; +DROP STATISTICS func_deps_stat; +-- now do the same thing, but with expressions +INSERT INTO functional_dependencies (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1'); + estimated | actual +-----------+-------- + 1 | 35 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1'); + estimated | actual +-----------+-------- + 1 | 5 +(1 row) + +-- create statistics +CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies; +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1'); + estimated | actual +-----------+-------- + 35 | 35 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1'); + estimated | actual +-----------+-------- + 5 | 5 +(1 row) + +-- a => b, a => c, b => c +TRUNCATE functional_dependencies; +DROP STATISTICS func_deps_stat; +INSERT INTO functional_dependencies (a, b, c, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1'''); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); + estimated | actual +-----------+-------- + 3 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 3 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1'''); + estimated | actual +-----------+-------- + 2472 | 2400 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1441 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 3909 | 2550 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])'); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- create statistics +CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; +ANALYZE functional_dependencies; +-- print the detected dependencies +SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; + dependencies +------------------------------------------------------------------------------------------------------------ + {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000, "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 197 | 200 +(1 row) + +-- OR clauses referencing different attributes are incompatible +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1'''); + estimated | actual +-----------+-------- + 2472 | 2400 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1441 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 3909 | 2550 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])'); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- changing the type of column c causes all its stats to be dropped, reverting +-- to default estimates without any statistics, i.e. 0.5% selectivity for each +-- condition +ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +DROP STATISTICS func_deps_stat; +-- now try functional dependencies with expressions +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'' AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) IN (2)'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND upper(b) IN (''1'', ''2'', ''26'', ''27'') AND (c + 1) IN (2, 3)'); + estimated | actual +-----------+-------- + 1 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (upper(b) = ''1'' OR upper(b) = ''2'')'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND (upper(b) = ''1'' OR upper(b) = ''2'')'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR upper(b) = ''1'') AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND upper(b) = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = ANY (ARRAY[2])'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND upper(b) = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND (c + 1) = ANY (ARRAY[2, 3])'); + estimated | actual +-----------+-------- + 1 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +-- the estimates however improve thanks to having expression statistics +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND upper(b) > ''1'''); + estimated | actual +-----------+-------- + 926 | 2400 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND upper(b) <= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1543 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND upper(b) >= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 2229 | 2550 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1''])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- create statistics on expressions +CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FROM functional_dependencies; +ANALYZE functional_dependencies; +-- print the detected dependencies +SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; + dependencies +------------------------------------------------------------------------------------------------------------------------ + {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'' AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) IN (''1'', ''2'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) IN (2)'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND upper(b) IN (''1'', ''2'', ''26'', ''27'') AND (c + 1) IN (2, 3)'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (upper(b) = ''1'' OR upper(b) = ''2'')'); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND (upper(b) = ''1'' OR upper(b) = ''2'')'); + estimated | actual +-----------+-------- + 197 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR upper(b) = ''1'') AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ''1'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND upper(b) = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = ANY (ARRAY[2])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND upper(b) = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND (c + 1) = ANY (ARRAY[2, 3])'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +-- the estimates however improve thanks to having expression statistics +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND upper(b) > ''1'''); + estimated | actual +-----------+-------- + 2472 | 2400 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND upper(b) <= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1441 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND upper(b) >= ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 3909 | 2550 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1''])'); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ALL (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- check the ability to use multiple functional dependencies +CREATE TABLE functional_dependencies_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +) +WITH (autovacuum_enabled = off); +INSERT INTO functional_dependencies_multi (a, b, c, d) + SELECT + mod(i,7), + mod(i,7), + mod(i,11), + mod(i,11) + FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies_multi; +-- estimates without any functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 102 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND 0 = b'); + estimated | actual +-----------+-------- + 102 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 41 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 1 | 64 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND b = 0 AND 0 = c AND d = 0'); + estimated | actual +-----------+-------- + 1 | 64 +(1 row) + +-- create separate functional dependencies +CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi; +CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi; +ANALYZE functional_dependencies_multi; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 714 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND 0 = b'); + estimated | actual +-----------+-------- + 714 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 454 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 65 | 64 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND b = 0 AND 0 = c AND d = 0'); + estimated | actual +-----------+-------- + 65 | 64 +(1 row) + +DROP TABLE functional_dependencies_multi; +-- MCV lists +CREATE TABLE mcv_lists ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b VARCHAR, + filler3 DATE, + c INT, + d TEXT, + ia INT[] +) +WITH (autovacuum_enabled = off); +-- random data (no MCV list) +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 4 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 4 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +-- random data (no MCV list), but with expression +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,1000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1'); + estimated | actual +-----------+-------- + 1 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1'); + estimated | actual +-----------+-------- + 13 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +-- 100 distinct combinations, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + estimated | actual +-----------+-------- + 343 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 343 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 26 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + estimated | actual +-----------+-------- + 26 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 10 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + estimated | actual +-----------+-------- + 10 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + +-- check change of unrelated column type does not reset the MCV statistics +ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); +SELECT d.stxdmcv IS NOT NULL + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxname = 'mcv_lists_stats' + AND d.stxoid = s.oid; + ?column? +---------- + t +(1 row) + +-- check change of column type resets the MCV statistics +ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +-- 100 distinct combinations, all in the MCV list, but with expressions +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,1000) s(i); +ANALYZE mcv_lists; +-- without any stats on the expressions, we have to use default selectivities, which +-- is why the estimates here are different from the pre-computed case above +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 111 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 111 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 15 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 11 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats) +CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 149 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 20 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 20 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 116 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 12 | 100 +(1 row) + +DROP STATISTICS mcv_lists_stats_1; +DROP STATISTICS mcv_lists_stats_2; +DROP STATISTICS mcv_lists_stats_3; +-- create statistics with both MCV and expressions +CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 105 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +-- 100 distinct combinations with NULL values, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT + (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END), + (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END), + (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END), + i + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + estimated | actual +-----------+-------- + 49 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + estimated | actual +-----------+-------- + 95 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +-- test pg_mcv_list_items with a very simple (single item) MCV list +TRUNCATE mcv_lists; +INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i); +ANALYZE mcv_lists; +SELECT m.* + FROM pg_statistic_ext s, pg_statistic_ext_data d, + pg_mcv_list_items(d.stxdmcv) m + WHERE s.stxname = 'mcv_lists_stats' + AND d.stxoid = s.oid; + index | values | nulls | frequency | base_frequency +-------+---------+---------+-----------+---------------- + 0 | {1,2,3} | {f,f,f} | 1 | 1 +(1 row) + +-- 2 distinct combinations with NULL values, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, d) + SELECT + NULL, -- always NULL + (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END), + (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END), + (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END) + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 3750 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 3750 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + estimated | actual +-----------+-------- + 3750 | 2500 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists; +ANALYZE mcv_lists; +-- test pg_mcv_list_items with MCV list containing variable-length data and NULLs +SELECT m.* + FROM pg_statistic_ext s, pg_statistic_ext_data d, + pg_mcv_list_items(d.stxdmcv) m + WHERE s.stxname = 'mcv_lists_stats' + AND d.stxoid = s.oid; + index | values | nulls | frequency | base_frequency +-------+------------------+---------+-----------+---------------- + 0 | {NULL,x,x} | {t,f,f} | 0.5 | 0.25 + 1 | {NULL,NULL,NULL} | {t,t,t} | 0.5 | 0.25 +(2 rows) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x'''); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +-- mcv with pass-by-ref fixlen types, e.g. uuid +CREATE TABLE mcv_lists_uuid ( + a UUID, + b UUID, + c UUID +) +WITH (autovacuum_enabled = off); +INSERT INTO mcv_lists_uuid (a, b, c) + SELECT + md5(mod(i,100)::text)::uuid, + md5(mod(i,50)::text)::uuid, + md5(mod(i,25)::text)::uuid + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists_uuid; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +CREATE STATISTICS mcv_lists_uuid_stats (mcv) ON a, b, c + FROM mcv_lists_uuid; +ANALYZE mcv_lists_uuid; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +DROP TABLE mcv_lists_uuid; +-- mcv with arrays +CREATE TABLE mcv_lists_arrays ( + a TEXT[], + b NUMERIC[], + c INT[] +) +WITH (autovacuum_enabled = off); +INSERT INTO mcv_lists_arrays (a, b, c) + SELECT + ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)], + ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000], + ARRAY[(i/100-1), i/100, (i/100+1)] + FROM generate_series(1,5000) s(i); +CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c + FROM mcv_lists_arrays; +ANALYZE mcv_lists_arrays; +-- mcv with bool +CREATE TABLE mcv_lists_bool ( + a BOOL, + b BOOL, + c BOOL +) +WITH (autovacuum_enabled = off); +INSERT INTO mcv_lists_bool (a, b, c) + SELECT + (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0) + FROM generate_series(1,10000) s(i); +ANALYZE mcv_lists_bool; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c'); + estimated | actual +-----------+-------- + 156 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c'); + estimated | actual +-----------+-------- + 156 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c'); + estimated | actual +-----------+-------- + 469 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c'); + estimated | actual +-----------+-------- + 1094 | 0 +(1 row) + +CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c + FROM mcv_lists_bool; +ANALYZE mcv_lists_bool; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c'); + estimated | actual +-----------+-------- + 1250 | 1250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- mcv covering just a small fraction of data +CREATE TABLE mcv_lists_partial ( + a INT, + b INT, + c INT +); +-- 10 frequent groups, each with 100 elements +INSERT INTO mcv_lists_partial (a, b, c) + SELECT + mod(i,10), + mod(i,10), + mod(i,10) + FROM generate_series(0,999) s(i); +-- 100 groups that will make it to the MCV list (includes the 10 frequent ones) +INSERT INTO mcv_lists_partial (a, b, c) + SELECT + i, + i, + i + FROM generate_series(0,99) s(i); +-- 4000 groups in total, most of which won't make it (just a single item) +INSERT INTO mcv_lists_partial (a, b, c) + SELECT + i, + i, + i + FROM generate_series(0,3999) s(i); +ANALYZE mcv_lists_partial; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0'); + estimated | actual +-----------+-------- + 1 | 102 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0'); + estimated | actual +-----------+-------- + 300 | 102 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10'); + estimated | actual +-----------+-------- + 1 | 2 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10'); + estimated | actual +-----------+-------- + 6 | 2 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10'); + estimated | actual +-----------+-------- + 204 | 104 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)'); + estimated | actual +-----------+-------- + 1 | 306 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)'); + estimated | actual +-----------+-------- + 6 | 102 +(1 row) + +CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c + FROM mcv_lists_partial; +ANALYZE mcv_lists_partial; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0'); + estimated | actual +-----------+-------- + 102 | 102 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0'); + estimated | actual +-----------+-------- + 96 | 102 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10'); + estimated | actual +-----------+-------- + 2 | 2 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10'); + estimated | actual +-----------+-------- + 2 | 2 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10'); + estimated | actual +-----------+-------- + 102 | 104 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)'); + estimated | actual +-----------+-------- + 306 | 306 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)'); + estimated | actual +-----------+-------- + 108 | 102 +(1 row) + +DROP TABLE mcv_lists_partial; +-- check the ability to use multiple MCV lists +CREATE TABLE mcv_lists_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +) +WITH (autovacuum_enabled = off); +INSERT INTO mcv_lists_multi (a, b, c, d) + SELECT + mod(i,5), + mod(i,5), + mod(i,7), + mod(i,7) + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists_multi; +-- estimates without any mcv statistics +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 200 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 102 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0'); + estimated | actual +-----------+-------- + 143 | 142 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0'); + estimated | actual +-----------+-------- + 1571 | 1572 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 4 | 142 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)'); + estimated | actual +-----------+-------- + 298 | 1572 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0'); + estimated | actual +-----------+-------- + 2649 | 1572 +(1 row) + +-- create separate MCV statistics +CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi; +CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi; +ANALYZE mcv_lists_multi; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 714 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0'); + estimated | actual +-----------+-------- + 143 | 142 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0'); + estimated | actual +-----------+-------- + 1571 | 1572 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 143 | 142 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)'); + estimated | actual +-----------+-------- + 1571 | 1572 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0'); + estimated | actual +-----------+-------- + 1571 | 1572 +(1 row) + +DROP TABLE mcv_lists_multi; +-- statistics on integer expressions +CREATE TABLE expr_stats (a int, b int, c int); +INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +DROP STATISTICS expr_stats_1; +DROP TABLE expr_stats; +-- statistics on a mix columns and expressions +CREATE TABLE expr_stats (a int, b int, c int); +INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +DROP TABLE expr_stats; +-- statistics on expressions with different data types +CREATE TABLE expr_stats (a int, b name, c text); +INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0'''); + estimated | actual +-----------+-------- + 11 | 100 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +DROP TABLE expr_stats; +-- test handling of a mix of compatible and incompatible expressions +CREATE TABLE expr_stats_incompatible_test ( + c0 double precision, + c1 boolean NOT NULL +); +CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test; +INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true); +ANALYZE expr_stats_incompatible_test; +SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE +( + upper('x') LIKE ('x'||('[0,1]'::int4range)) + AND + (c0 IN (0, 1) OR c1) +); + c0 +---- +(0 rows) + +DROP TABLE expr_stats_incompatible_test; +-- Permission tests. Users should not be able to see specific data values in +-- the extended statistics, if they lack permission to see those values in +-- the underlying table. +-- +-- Currently this is only relevant for MCV stats. +CREATE SCHEMA tststats; +CREATE TABLE tststats.priv_test_tbl ( + a int, + b int +); +INSERT INTO tststats.priv_test_tbl + SELECT mod(i,5), mod(i,10) FROM generate_series(1,100) s(i); +CREATE STATISTICS tststats.priv_test_stats (mcv) ON a, b + FROM tststats.priv_test_tbl; +ANALYZE tststats.priv_test_tbl; +-- Check printing info about extended statistics by \dX +create table stts_t1 (a int, b int); +create statistics stts_1 (ndistinct) on a, b from stts_t1; +create statistics stts_2 (ndistinct, dependencies) on a, b from stts_t1; +create statistics stts_3 (ndistinct, dependencies, mcv) on a, b from stts_t1; +create table stts_t2 (a int, b int, c int); +create statistics stts_4 on b, c from stts_t2; +create table stts_t3 (col1 int, col2 int, col3 int); +create statistics stts_hoge on col1, col2, col3 from stts_t3; +create schema stts_s1; +create schema stts_s2; +create statistics stts_s1.stts_foo on col1, col2 from stts_t3; +create statistics stts_s2.stts_yama (dependencies, mcv) on col1, col3 from stts_t3; +insert into stts_t1 select i,i from generate_series(1,100) i; +analyze stts_t1; +set search_path to public, stts_s1, stts_s2, tststats; +\dX + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +----------+------------------------+------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | (a * 2), upper(b), (c + 1::numeric) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined + tststats | priv_test_stats | a, b FROM priv_test_tbl | | | defined +(12 rows) + +\dX stts_? + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +--------+--------+-------------------+-----------+--------------+--------- + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined +(4 rows) + +\dX *stts_hoge + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +--------+-----------+-------------------------------+-----------+--------------+--------- + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined +(1 row) + +\dX+ + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +----------+------------------------+------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | (a * 2), upper(b), (c + 1::numeric) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined + tststats | priv_test_stats | a, b FROM priv_test_tbl | | | defined +(12 rows) + +\dX+ stts_? + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +--------+--------+-------------------+-----------+--------------+--------- + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined +(4 rows) + +\dX+ *stts_hoge + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +--------+-----------+-------------------------------+-----------+--------------+--------- + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined +(1 row) + +\dX+ stts_s2.stts_yama + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +---------+-----------+-------------------------+-----------+--------------+--------- + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined +(1 row) + +set search_path to public, stts_s1; +\dX + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +---------+------------------------+------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | (a * 2), upper(b), (c + 1::numeric) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined +(10 rows) + +create role regress_stats_ext nosuperuser; +set role regress_stats_ext; +\dX + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +--------+------------------------+------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | (a * 2), upper(b), (c + 1::numeric) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined +(9 rows) + +reset role; +drop table stts_t1, stts_t2, stts_t3; +drop schema stts_s1, stts_s2 cascade; +drop user regress_stats_ext; +reset search_path; +-- User with no access +CREATE USER regress_stats_user1; +GRANT USAGE ON SCHEMA tststats TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_tbl; -- Permission denied +ERROR: permission denied for table priv_test_tbl +-- Check individual columns if we don't have table privilege +SELECT * FROM tststats.priv_test_tbl + WHERE a = 1 and tststats.priv_test_tbl.* > (1, 1) is not null; +ERROR: permission denied for table priv_test_tbl +-- Attempt to gain access using a leaky operator +CREATE FUNCTION op_leak(int, int) RETURNS bool + AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END' + LANGUAGE plpgsql; +CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, + restrict = scalarltsel); +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_tbl +DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_tbl +-- Grant access via a security barrier view, but hide all data +RESET SESSION AUTHORIZATION; +CREATE VIEW tststats.priv_test_view WITH (security_barrier=true) + AS SELECT * FROM tststats.priv_test_tbl WHERE false; +GRANT SELECT, DELETE ON tststats.priv_test_view TO regress_stats_user1; +-- Should now have access via the view, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Grant table access, but hide all data with RLS +RESET SESSION AUTHORIZATION; +ALTER TABLE tststats.priv_test_tbl ENABLE ROW LEVEL SECURITY; +GRANT SELECT, DELETE ON tststats.priv_test_tbl TO regress_stats_user1; +-- Should now have direct table access, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Tidy up +DROP OPERATOR <<< (int, int); +DROP FUNCTION op_leak(int, int); +RESET SESSION AUTHORIZATION; +DROP SCHEMA tststats CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table tststats.priv_test_tbl +drop cascades to view tststats.priv_test_view +DROP USER regress_stats_user1; |