diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/test/regress/expected/partition_aggregate.out | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/test/regress/expected/partition_aggregate.out')
-rw-r--r-- | src/test/regress/expected/partition_aggregate.out | 1520 |
1 files changed, 1520 insertions, 0 deletions
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out new file mode 100644 index 0000000..1b900fd --- /dev/null +++ b/src/test/regress/expected/partition_aggregate.out @@ -0,0 +1,1520 @@ +-- +-- PARTITION_AGGREGATE +-- Test partitionwise aggregation on partitioned tables +-- +-- Note: to ensure plan stability, it's a good idea to make the partitions of +-- any one partitioned table in this test all have different numbers of rows. +-- +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; +-- Disable parallel plans. +SET max_parallel_workers_per_gather TO 0; +-- Disable incremental sort, which can influence selected plans due to fuzz factor. +SET enable_incremental_sort TO off; +-- +-- Tests for list partitioned tables. +-- +CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); +CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003', '0004'); +CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0005', '0006', '0007', '0008'); +CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0009', '0010', '0011'); +INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------- + Sort + Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) + -> Append + -> HashAggregate + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p1 pagg_tab + -> HashAggregate + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> HashAggregate + Group Key: pagg_tab_2.c + Filter: (avg(pagg_tab_2.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(15 rows) + +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count | min | max +------+------+---------------------+-------+-----+----- + 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 + 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 + 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 + 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 + 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 + 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------- + Sort + Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) + -> Finalize HashAggregate + Group Key: pagg_tab.a + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab.a + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Partial HashAggregate + Group Key: pagg_tab_1.a + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Partial HashAggregate + Group Key: pagg_tab_2.a + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(15 rows) + +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count | min | max +----+------+---------------------+-------+-----+----- + 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 + 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 + 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 + 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 + 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 + 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 + 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 + 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 + 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 + 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 +(10 rows) + +-- Check with multiple columns in GROUP BY +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------ + Append + -> HashAggregate + Group Key: pagg_tab.a, pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> HashAggregate + Group Key: pagg_tab_1.a, pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> HashAggregate + Group Key: pagg_tab_2.a, pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in GROUP BY is reversed +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; + QUERY PLAN +------------------------------------------------ + Append + -> HashAggregate + Group Key: pagg_tab.c, pagg_tab.a + -> Seq Scan on pagg_tab_p1 pagg_tab + -> HashAggregate + Group Key: pagg_tab_1.c, pagg_tab_1.a + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> HashAggregate + Group Key: pagg_tab_2.c, pagg_tab_2.a + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in target-list is reversed +EXPLAIN (COSTS OFF) +SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------ + Append + -> HashAggregate + Group Key: pagg_tab.a, pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> HashAggregate + Group Key: pagg_tab_1.a, pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> HashAggregate + Group Key: pagg_tab_2.a, pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(10 rows) + +-- Test when input relation for grouping is dummy +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + QUERY PLAN +-------------------------------- + HashAggregate + Group Key: c + -> Result + One-Time Filter: false +(4 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + c | sum +---+----- +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + QUERY PLAN +-------------------------------- + GroupAggregate + -> Result + One-Time Filter: false +(3 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + c | sum +---+----- +(0 rows) + +-- Test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +-- When GROUP BY clause matches full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------- + Sort + Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) + -> Append + -> GroupAggregate + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> GroupAggregate + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> GroupAggregate + Group Key: pagg_tab_2.c + Filter: (avg(pagg_tab_2.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(21 rows) + +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count +------+------+---------------------+------- + 0000 | 2000 | 12.0000000000000000 | 250 + 0001 | 2250 | 13.0000000000000000 | 250 + 0002 | 2500 | 14.0000000000000000 | 250 + 0006 | 2500 | 12.0000000000000000 | 250 + 0007 | 2750 | 13.0000000000000000 | 250 + 0008 | 2000 | 14.0000000000000000 | 250 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) + -> Finalize GroupAggregate + Group Key: pagg_tab.a + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Merge Append + Sort Key: pagg_tab.a + -> Partial GroupAggregate + Group Key: pagg_tab.a + -> Sort + Sort Key: pagg_tab.a + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Partial GroupAggregate + Group Key: pagg_tab_1.a + -> Sort + Sort Key: pagg_tab_1.a + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Partial GroupAggregate + Group Key: pagg_tab_2.a + -> Sort + Sort Key: pagg_tab_2.a + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(22 rows) + +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 10.0000000000000000 | 150 + 1 | 1650 | 11.0000000000000000 | 150 + 2 | 1800 | 12.0000000000000000 | 150 + 3 | 1950 | 13.0000000000000000 | 150 + 4 | 2100 | 14.0000000000000000 | 150 + 10 | 1500 | 10.0000000000000000 | 150 + 11 | 1650 | 11.0000000000000000 | 150 + 12 | 1800 | 12.0000000000000000 | 150 + 13 | 1950 | 13.0000000000000000 | 150 + 14 | 2100 | 14.0000000000000000 | 150 +(10 rows) + +-- Test partitionwise grouping without any aggregates +EXPLAIN (COSTS OFF) +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + QUERY PLAN +------------------------------------------------------ + Merge Append + Sort Key: pagg_tab.c + -> Group + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Group + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Group + Group Key: pagg_tab_2.c + -> Sort + Sort Key: pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(17 rows) + +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + c +------ + 0000 + 0001 + 0002 + 0003 + 0004 + 0005 + 0006 + 0007 + 0008 + 0009 + 0010 + 0011 +(12 rows) + +EXPLAIN (COSTS OFF) +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + QUERY PLAN +------------------------------------------------------------ + Group + Group Key: pagg_tab.a + -> Merge Append + Sort Key: pagg_tab.a + -> Group + Group Key: pagg_tab.a + -> Sort + Sort Key: pagg_tab.a + -> Seq Scan on pagg_tab_p1 pagg_tab + Filter: (a < 3) + -> Group + Group Key: pagg_tab_1.a + -> Sort + Sort Key: pagg_tab_1.a + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + Filter: (a < 3) + -> Group + Group Key: pagg_tab_2.a + -> Sort + Sort Key: pagg_tab_2.a + -> Seq Scan on pagg_tab_p3 pagg_tab_2 + Filter: (a < 3) +(22 rows) + +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + a +--- + 0 + 1 + 2 +(3 rows) + +RESET enable_hashagg; +-- ROLLUP, partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------------ + Sort + Sort Key: pagg_tab.c, (sum(pagg_tab.a)) + -> MixedAggregate + Hash Key: pagg_tab.c + Group Key: () + -> Append + -> Seq Scan on pagg_tab_p1 pagg_tab_1 + -> Seq Scan on pagg_tab_p2 pagg_tab_2 + -> Seq Scan on pagg_tab_p3 pagg_tab_3 +(9 rows) + +-- ORDERED SET within the aggregate. +-- Full aggregation; since all the rows that belong to the same group come +-- from the same partition, having an ORDER BY within the aggregate doesn't +-- make any difference. +EXPLAIN (COSTS OFF) +SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; + QUERY PLAN +--------------------------------------------------------------- + Sort + Sort Key: pagg_tab.c, (sum(pagg_tab.b ORDER BY pagg_tab.a)) + -> Append + -> GroupAggregate + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c, pagg_tab.a + -> Seq Scan on pagg_tab_p1 pagg_tab + -> GroupAggregate + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c, pagg_tab_1.a + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> GroupAggregate + Group Key: pagg_tab_2.c + -> Sort + Sort Key: pagg_tab_2.c, pagg_tab_2.a + -> Seq Scan on pagg_tab_p3 pagg_tab_2 +(18 rows) + +-- Since GROUP BY clause does not match with PARTITION KEY; we need to do +-- partial aggregation. However, ORDERED SET are not partial safe and thus +-- partitionwise aggregation plan is not generated. +EXPLAIN (COSTS OFF) +SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; + QUERY PLAN +--------------------------------------------------------------- + Sort + Sort Key: pagg_tab.a, (sum(pagg_tab.b ORDER BY pagg_tab.a)) + -> GroupAggregate + Group Key: pagg_tab.a + -> Sort + Sort Key: pagg_tab.a + -> Append + -> Seq Scan on pagg_tab_p1 pagg_tab_1 + -> Seq Scan on pagg_tab_p2 pagg_tab_2 + -> Seq Scan on pagg_tab_p3 pagg_tab_3 +(10 rows) + +-- JOIN query +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.x, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t1_1.x + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t1_2.x + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- Check with whole-row reference; partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Append + -> Seq Scan on pagg_tab1_p1 t1_1 + -> Seq Scan on pagg_tab1_p2 t1_2 + -> Seq Scan on pagg_tab1_p3 t1_3 + -> Hash + -> Append + -> Seq Scan on pagg_tab2_p1 t2_1 + -> Seq Scan on pagg_tab2_p2 t2_2 + -> Seq Scan on pagg_tab2_p3 t2_3 +(15 rows) + +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- GROUP BY having other matching key +EXPLAIN (COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t2.y, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t2.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t2_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t2_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +-- Also test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------- + Sort + Sort Key: t1.y, (sum(t1.x)), (count(*)) + -> Finalize GroupAggregate + Group Key: t1.y + Filter: (avg(t1.x) > '10'::numeric) + -> Merge Append + Sort Key: t1.y + -> Partial GroupAggregate + Group Key: t1.y + -> Sort + Sort Key: t1.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> Partial GroupAggregate + Group Key: t1_1.y + -> Sort + Sort Key: t1_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> Partial GroupAggregate + Group Key: t1_2.y + -> Sort + Sort Key: t1_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(34 rows) + +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + y | sum | count +----+------+------- + 2 | 600 | 50 + 4 | 1200 | 50 + 8 | 900 | 50 + 12 | 600 | 50 + 14 | 1200 | 50 + 18 | 900 | 50 +(6 rows) + +RESET enable_hashagg; +-- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for +-- aggregation +-- LEFT JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: b.y + -> Sort + Sort Key: b.y + -> Append + -> Partial HashAggregate + Group Key: b.y + -> Hash Left Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: b_1.y + -> Hash Left Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: b_2.y + -> Hash Right Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 6 | 1100 + 12 | 700 + 18 | 1300 + 24 | 900 + | 900 +(6 rows) + +-- RIGHT JOIN, should produce full partitionwise aggregation plan as +-- GROUP BY is on non-nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------ + Sort + Sort Key: b.y + -> Append + -> HashAggregate + Group Key: b.y + -> Hash Right Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> HashAggregate + Group Key: b_1.y + -> Hash Right Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> HashAggregate + Group Key: b_2.y + -> Hash Left Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(24 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 3 | + 6 | 1100 + 9 | + 12 | 700 + 15 | + 18 | 1300 + 21 | + 24 | 900 + 27 | +(10 rows) + +-- FULL JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: a.x + -> Sort + Sort Key: a.x + -> Append + -> Partial HashAggregate + Group Key: a.x + -> Hash Full Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: a_1.x + -> Hash Full Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: a_2.x + -> Hash Full Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + x | sum +----+------ + 0 | 500 + 2 | + 4 | + 6 | 1100 + 8 | + 10 | + 12 | 700 + 14 | + 16 | + 18 | 1300 + 20 | + 22 | + 24 | 900 + 26 | + 28 | + | 500 +(16 rows) + +-- LEFT JOIN, with dummy relation on right side, ideally +-- should produce full partitionwise aggregation plan as GROUP BY is on +-- non-nullable columns. +-- But right now we are unable to do partitionwise join in this case. +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: pagg_tab1.x, pagg_tab2.y + -> HashAggregate + Group Key: pagg_tab1.x, pagg_tab2.y + -> Hash Left Join + Hash Cond: (pagg_tab1.x = pagg_tab2.y) + Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) + -> Append + -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 + Filter: (x < 20) + -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 + Filter: (x < 20) + -> Hash + -> Append + -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 + Filter: (y > 10) + -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 + Filter: (y > 10) +(18 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 +(7 rows) + +-- FULL JOIN, with dummy relations on both sides, ideally +-- should produce partial partitionwise aggregation plan as GROUP BY is on +-- nullable columns. +-- But right now we are unable to do partitionwise join in this case. +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: pagg_tab1.x, pagg_tab2.y + -> HashAggregate + Group Key: pagg_tab1.x, pagg_tab2.y + -> Hash Full Join + Hash Cond: (pagg_tab1.x = pagg_tab2.y) + Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) + -> Append + -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 + Filter: (x < 20) + -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 + Filter: (x < 20) + -> Hash + -> Append + -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 + Filter: (y > 10) + -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 + Filter: (y > 10) +(18 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 + | 15 | 10 +(8 rows) + +-- Empty join relation because of empty outer side, no partitionwise agg plan +EXPLAIN (COSTS OFF) +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + QUERY PLAN +-------------------------------------- + GroupAggregate + Group Key: pagg_tab1.y + -> Sort + Sort Key: pagg_tab1.y + -> Result + One-Time Filter: false +(6 rows) + +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + x | y | count +---+---+------- +(0 rows) + +-- Partition by multiple columns +CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); +CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (12, 12); +CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (12, 12) TO (22, 22); +CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (22, 22) TO (30, 30); +INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab_m; +-- Partial aggregation as GROUP BY clause does not match with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) + -> Finalize HashAggregate + Group Key: pagg_tab_m.a + Filter: (avg(pagg_tab_m.c) < '22'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_m.a + -> Seq Scan on pagg_tab_m_p1 pagg_tab_m + -> Partial HashAggregate + Group Key: pagg_tab_m_1.a + -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 + -> Partial HashAggregate + Group Key: pagg_tab_m_2.a + -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 20.0000000000000000 | 100 + 1 | 1600 | 21.0000000000000000 | 100 + 10 | 1500 | 20.0000000000000000 | 100 + 11 | 1600 | 21.0000000000000000 | 100 + 20 | 1500 | 20.0000000000000000 | 100 + 21 | 1600 | 21.0000000000000000 | 100 +(6 rows) + +-- Full aggregation as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) + -> Append + -> HashAggregate + Group Key: pagg_tab_m.a, ((pagg_tab_m.a + pagg_tab_m.b) / 2) + Filter: (sum(pagg_tab_m.b) < 50) + -> Seq Scan on pagg_tab_m_p1 pagg_tab_m + -> HashAggregate + Group Key: pagg_tab_m_1.a, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2) + Filter: (sum(pagg_tab_m_1.b) < 50) + -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 + -> HashAggregate + Group Key: pagg_tab_m_2.a, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2) + Filter: (sum(pagg_tab_m_2.b) < 50) + -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + a | sum | avg | count +----+-----+---------------------+------- + 0 | 0 | 20.0000000000000000 | 25 + 1 | 25 | 21.0000000000000000 | 25 + 10 | 0 | 20.0000000000000000 | 25 + 11 | 25 | 21.0000000000000000 | 25 + 20 | 0 | 20.0000000000000000 | 25 + 21 | 25 | 21.0000000000000000 | 25 +(6 rows) + +-- Full aggregation as PARTITION KEY is part of GROUP BY clause +EXPLAIN (COSTS OFF) +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b)) + -> Append + -> HashAggregate + Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a + Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p1 pagg_tab_m + -> HashAggregate + Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a + Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 + -> HashAggregate + Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a + Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 +(15 rows) + +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + a | c | sum | avg | count +----+----+-----+---------------------+------- + 0 | 30 | 50 | 30.0000000000000000 | 5 + 0 | 40 | 50 | 40.0000000000000000 | 5 + 10 | 30 | 50 | 30.0000000000000000 | 5 + 10 | 40 | 50 | 40.0000000000000000 | 5 + 20 | 30 | 50 | 30.0000000000000000 | 5 + 20 | 40 | 50 | 40.0000000000000000 | 5 +(6 rows) + +-- Test with multi-level partitioning scheme +CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); +CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (12); +CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (12) TO (20) PARTITION BY LIST (c); +CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001', '0002'); +CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0003'); +-- This level of partitioning has different column positions than the parent +CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); +CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); +CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (7) TO (10); +ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (7); +ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_ml; +-- For Parallel Append +SET max_parallel_workers_per_gather TO 2; +SET parallel_setup_cost = 0; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, but still we do not see a partial aggregation as array_agg() +-- is not partial agg safe. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + -> Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a, pagg_tab_ml.c + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(27 rows) + +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | array_agg | count +----+------+-------------+------- + 0 | 0 | {0000,0002} | 1000 + 1 | 1000 | {0001,0003} | 1000 + 2 | 2000 | {0000,0002} | 1000 + 10 | 0 | {0000,0002} | 1000 + 11 | 1000 | {0001,0003} | 1000 + 12 | 2000 | {0000,0002} | 1000 + 20 | 0 | {0000,0002} | 1000 + 21 | 1000 | {0001,0003} | 1000 + 22 | 2000 | {0000,0002} | 1000 +(9 rows) + +-- Without ORDER BY clause, to test Gather at top-most path +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; + QUERY PLAN +--------------------------------------------------------------------------- + Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a, pagg_tab_ml.c + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(25 rows) + +RESET parallel_setup_cost; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +--------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_2.a + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_3.a + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_5.a + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Partial HashAggregate + Group Key: pagg_tab_ml_6.a + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(31 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +--------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml.b + -> Sort + Sort Key: pagg_tab_ml.b + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml.b + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> Partial HashAggregate + Group Key: pagg_tab_ml_1.b + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_2.b + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_3.b + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 + -> Partial HashAggregate + Group Key: pagg_tab_ml_4.b + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 +(22 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c + Filter: (avg(pagg_tab_ml.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> HashAggregate + Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c + Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 + -> HashAggregate + Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c + Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 + -> HashAggregate + Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c + Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 + -> HashAggregate + Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c + Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 +(23 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates +SET min_parallel_table_scan_size TO '8kB'; +SET parallel_setup_cost TO 0; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) + -> Append + -> Finalize GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.a + -> Partial HashAggregate + Group Key: pagg_tab_ml.a + -> Parallel Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_2.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_3.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_5.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Partial HashAggregate + Group Key: pagg_tab_ml_6.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(41 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml.b + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.b + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml.b + -> Parallel Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> Partial HashAggregate + Group Key: pagg_tab_ml_3.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 + -> Partial HashAggregate + Group Key: pagg_tab_ml_1.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_4.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 + -> Partial HashAggregate + Group Key: pagg_tab_ml_2.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 +(24 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) + -> Parallel Append + -> HashAggregate + Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c + Filter: (avg(pagg_tab_ml.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> HashAggregate + Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c + Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 + -> HashAggregate + Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c + Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 + -> HashAggregate + Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c + Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 + -> HashAggregate + Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c + Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 +(25 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates (single level) +-- Add few parallel setup cost, so that we will see a plan which gathers +-- partially created paths even for full aggregation and sticks a single Gather +-- followed by finalization step. +-- Without this, the cost of doing partial aggregation + Gather + finalization +-- for each partition and then Append over it turns out to be same and this +-- wins as we add it first. This parallel_setup_cost plays a vital role in +-- costing such plans. +SET parallel_setup_cost TO 10; +CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (12); +CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (12) TO (22); +CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (22) TO (30); +INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_para; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para.x + Filter: (avg(pagg_tab_para.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para.x + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.x + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.x + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.x + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(19 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para.y + Filter: (avg(pagg_tab_para.x) < '12'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para.y + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(19 rows) + +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + y | sum | avg | count +----+-------+---------------------+------- + 0 | 15000 | 10.0000000000000000 | 1500 + 1 | 16500 | 11.0000000000000000 | 1500 + 10 | 15000 | 10.0000000000000000 | 1500 + 11 | 16500 | 11.0000000000000000 | 1500 +(4 rows) + +-- Test when parent can produce parallel paths but not any (or some) of its children +-- (Use one more aggregate to tilt the cost estimates for the plan we want) +ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); +ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para.x + Filter: (avg(pagg_tab_para.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para.x + -> Partial HashAggregate + Group Key: pagg_tab_para.x + -> Parallel Append + -> Seq Scan on pagg_tab_para_p1 pagg_tab_para_1 + -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_3 + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_2 +(15 rows) + +SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | sum | count +----+------+--------------------+-------+------- + 0 | 5000 | 5.0000000000000000 | 5000 | 1000 + 1 | 6000 | 6.0000000000000000 | 7000 | 1000 + 10 | 5000 | 5.0000000000000000 | 15000 | 1000 + 11 | 6000 | 6.0000000000000000 | 17000 | 1000 + 20 | 5000 | 5.0000000000000000 | 25000 | 1000 + 21 | 6000 | 6.0000000000000000 | 27000 | 1000 +(6 rows) + +ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para.x + Filter: (avg(pagg_tab_para.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para.x + -> Partial HashAggregate + Group Key: pagg_tab_para.x + -> Parallel Append + -> Seq Scan on pagg_tab_para_p1 pagg_tab_para_1 + -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_2 + -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_3 +(15 rows) + +SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | sum | count +----+------+--------------------+-------+------- + 0 | 5000 | 5.0000000000000000 | 5000 | 1000 + 1 | 6000 | 6.0000000000000000 | 7000 | 1000 + 10 | 5000 | 5.0000000000000000 | 15000 | 1000 + 11 | 6000 | 6.0000000000000000 | 17000 | 1000 + 20 | 5000 | 5.0000000000000000 | 25000 | 1000 + 21 | 6000 | 6.0000000000000000 | 27000 | 1000 +(6 rows) + +-- Reset parallelism parameters to get partitionwise aggregation plan. +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) + -> Append + -> HashAggregate + Group Key: pagg_tab_para.x + Filter: (avg(pagg_tab_para.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> HashAggregate + Group Key: pagg_tab_para_1.x + Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> HashAggregate + Group Key: pagg_tab_para_2.x + Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(15 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + |