summaryrefslogtreecommitdiffstats
path: root/src/test/regress/expected/join_hash.out
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/regress/expected/join_hash.out')
-rw-r--r--src/test/regress/expected/join_hash.out1166
1 files changed, 1166 insertions, 0 deletions
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
new file mode 100644
index 0000000..262fa71
--- /dev/null
+++ b/src/test/regress/expected/join_hash.out
@@ -0,0 +1,1166 @@
+--
+-- exercises for the hash join code
+--
+begin;
+set local min_parallel_table_scan_size = 0;
+set local parallel_setup_cost = 0;
+set local enable_hashjoin = on;
+-- Extract bucket and batch counts from an explain analyze plan. In
+-- general we can't make assertions about how many batches (or
+-- buckets) will be required because it can vary, but we can in some
+-- special cases and we can check for growth.
+create or replace function find_hash(node json)
+returns json language plpgsql
+as
+$$
+declare
+ x json;
+ child json;
+begin
+ if node->>'Node Type' = 'Hash' then
+ return node;
+ else
+ for child in select json_array_elements(node->'Plans')
+ loop
+ x := find_hash(child);
+ if x is not null then
+ return x;
+ end if;
+ end loop;
+ return null;
+ end if;
+end;
+$$;
+create or replace function hash_join_batches(query text)
+returns table (original int, final int) language plpgsql
+as
+$$
+declare
+ whole_plan json;
+ hash_node json;
+begin
+ for whole_plan in
+ execute 'explain (analyze, format ''json'') ' || query
+ loop
+ hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan'));
+ original := hash_node->>'Original Hash Batches';
+ final := hash_node->>'Hash Batches';
+ return next;
+ end loop;
+end;
+$$;
+-- Make a simple relation with well distributed keys and correctly
+-- estimated size.
+create table simple as
+ select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+alter table simple set (parallel_workers = 2);
+analyze simple;
+-- Make a relation whose size we will under-estimate. We want stats
+-- to say 1000 rows, but actually there are 20,000 rows.
+create table bigger_than_it_looks as
+ select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+alter table bigger_than_it_looks set (autovacuum_enabled = 'false');
+alter table bigger_than_it_looks set (parallel_workers = 2);
+analyze bigger_than_it_looks;
+update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks';
+-- Make a relation whose size we underestimate and that also has a
+-- kind of skew that breaks our batching scheme. We want stats to say
+-- 2 rows, but actually there are 20,000 rows with the same key.
+create table extremely_skewed (id int, t text);
+alter table extremely_skewed set (autovacuum_enabled = 'false');
+alter table extremely_skewed set (parallel_workers = 2);
+analyze extremely_skewed;
+insert into extremely_skewed
+ select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
+ from generate_series(1, 20000);
+update pg_class
+ set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
+ where relname = 'extremely_skewed';
+-- Make a relation with a couple of enormous tuples.
+create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
+alter table wide set (parallel_workers = 2);
+-- The "optimal" case: the hash table fits in memory; we plan for 1
+-- batch, we stick to that number, and peak memory usage stays within
+-- our work_mem budget
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+set local work_mem = '4MB';
+set local hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(6 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | f
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-oblivious hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '4MB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = off;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+-------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(9 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | f
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-aware hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '4MB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = on;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+-------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Parallel Hash
+ -> Parallel Seq Scan on simple s
+(9 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | f
+(1 row)
+
+rollback to settings;
+-- The "good" case: batches required, but we plan the right number; we
+-- plan for some number of batches, and we stick to that number, and
+-- peak memory usage says within our work_mem budget
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(6 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ t | f
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-oblivious hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = off;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+-------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(9 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ t | f
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-aware hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '192kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = on;
+explain (costs off)
+ select count(*) from simple r join simple s using (id);
+ QUERY PLAN
+-------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Parallel Hash
+ -> Parallel Seq Scan on simple s
+(9 rows)
+
+select count(*) from simple r join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ t | f
+(1 row)
+
+-- parallel full multi-batch hash join
+select count(*) from simple r full outer join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+rollback to settings;
+-- The "bad" case: during execution we need to increase number of
+-- batches; in this case we plan for 1 batch, and increase at least a
+-- couple of times, and peak memory usage stays within our work_mem
+-- budget
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
+ QUERY PLAN
+------------------------------------------------------
+ Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on bigger_than_it_looks s
+(6 rows)
+
+select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | t
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-oblivious hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = off;
+explain (costs off)
+ select count(*) from simple r join bigger_than_it_looks s using (id);
+ QUERY PLAN
+------------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on bigger_than_it_looks s
+(9 rows)
+
+select count(*) from simple r join bigger_than_it_looks s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join bigger_than_it_looks s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | t
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-aware hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 1;
+set local work_mem = '192kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = on;
+explain (costs off)
+ select count(*) from simple r join bigger_than_it_looks s using (id);
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 1
+ -> Partial Aggregate
+ -> Parallel Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Parallel Hash
+ -> Parallel Seq Scan on bigger_than_it_looks s
+(9 rows)
+
+select count(*) from simple r join bigger_than_it_looks s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select original > 1 as initially_multibatch, final > original as increased_batches
+ from hash_join_batches(
+$$
+ select count(*) from simple r join bigger_than_it_looks s using (id);
+$$);
+ initially_multibatch | increased_batches
+----------------------+-------------------
+ f | t
+(1 row)
+
+rollback to settings;
+-- The "ugly" case: increasing the number of batches during execution
+-- doesn't help, so stop trying to fit in work_mem and hope for the
+-- best; in this case we plan for 1 batch, increases just once and
+-- then stop increasing because that didn't help at all, so we blow
+-- right through the work_mem budget and hope for the best...
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from simple r join extremely_skewed s using (id);
+ QUERY PLAN
+--------------------------------------------------
+ Aggregate
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on extremely_skewed s
+(6 rows)
+
+select count(*) from simple r join extremely_skewed s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select * from hash_join_batches(
+$$
+ select count(*) from simple r join extremely_skewed s using (id);
+$$);
+ original | final
+----------+-------
+ 1 | 2
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-oblivious hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = off;
+explain (costs off)
+ select count(*) from simple r join extremely_skewed s using (id);
+ QUERY PLAN
+--------------------------------------------------------
+ Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on extremely_skewed s
+(8 rows)
+
+select count(*) from simple r join extremely_skewed s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select * from hash_join_batches(
+$$
+ select count(*) from simple r join extremely_skewed s using (id);
+$$);
+ original | final
+----------+-------
+ 1 | 2
+(1 row)
+
+rollback to settings;
+-- parallel with parallel-aware hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 1;
+set local work_mem = '128kB';
+set local hash_mem_multiplier = 1.0;
+set local enable_parallel_hash = on;
+explain (costs off)
+ select count(*) from simple r join extremely_skewed s using (id);
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 1
+ -> Partial Aggregate
+ -> Parallel Hash Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Parallel Hash
+ -> Parallel Seq Scan on extremely_skewed s
+(9 rows)
+
+select count(*) from simple r join extremely_skewed s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+select * from hash_join_batches(
+$$
+ select count(*) from simple r join extremely_skewed s using (id);
+$$);
+ original | final
+----------+-------
+ 1 | 4
+(1 row)
+
+rollback to settings;
+-- A couple of other hash join tests unrelated to work_mem management.
+-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+set local work_mem = '4MB';
+set local hash_mem_multiplier = 1.0;
+set local parallel_leader_participation = off;
+select * from hash_join_batches(
+$$
+ select count(*) from simple r join simple s using (id);
+$$);
+ original | final
+----------+-------
+ 1 | 1
+(1 row)
+
+rollback to settings;
+-- Exercise rescans. We'll turn off parallel_leader_participation so
+-- that we can check that instrumentation comes back correctly.
+create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t;
+alter table join_foo set (parallel_workers = 0);
+create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t;
+alter table join_bar set (parallel_workers = 2);
+-- multi-batch with rescan, parallel-oblivious
+savepoint settings;
+set enable_parallel_hash = off;
+set parallel_leader_participation = off;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set max_parallel_workers_per_gather = 2;
+set enable_material = off;
+set enable_mergejoin = off;
+set work_mem = '64kB';
+set hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
+ -> Seq Scan on join_foo
+ -> Gather
+ Workers Planned: 2
+ -> Hash Join
+ Hash Cond: (b1.id = b2.id)
+ -> Parallel Seq Scan on join_bar b1
+ -> Hash
+ -> Seq Scan on join_bar b2
+(11 rows)
+
+select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ count
+-------
+ 3
+(1 row)
+
+select final > 1 as multibatch
+ from hash_join_batches(
+$$
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+$$);
+ multibatch
+------------
+ t
+(1 row)
+
+rollback to settings;
+-- single-batch with rescan, parallel-oblivious
+savepoint settings;
+set enable_parallel_hash = off;
+set parallel_leader_participation = off;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set max_parallel_workers_per_gather = 2;
+set enable_material = off;
+set enable_mergejoin = off;
+set work_mem = '4MB';
+set hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
+ -> Seq Scan on join_foo
+ -> Gather
+ Workers Planned: 2
+ -> Hash Join
+ Hash Cond: (b1.id = b2.id)
+ -> Parallel Seq Scan on join_bar b1
+ -> Hash
+ -> Seq Scan on join_bar b2
+(11 rows)
+
+select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ count
+-------
+ 3
+(1 row)
+
+select final > 1 as multibatch
+ from hash_join_batches(
+$$
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+$$);
+ multibatch
+------------
+ f
+(1 row)
+
+rollback to settings;
+-- multi-batch with rescan, parallel-aware
+savepoint settings;
+set enable_parallel_hash = on;
+set parallel_leader_participation = off;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set max_parallel_workers_per_gather = 2;
+set enable_material = off;
+set enable_mergejoin = off;
+set work_mem = '64kB';
+set hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
+ -> Seq Scan on join_foo
+ -> Gather
+ Workers Planned: 2
+ -> Parallel Hash Join
+ Hash Cond: (b1.id = b2.id)
+ -> Parallel Seq Scan on join_bar b1
+ -> Parallel Hash
+ -> Parallel Seq Scan on join_bar b2
+(11 rows)
+
+select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ count
+-------
+ 3
+(1 row)
+
+select final > 1 as multibatch
+ from hash_join_batches(
+$$
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+$$);
+ multibatch
+------------
+ t
+(1 row)
+
+rollback to settings;
+-- single-batch with rescan, parallel-aware
+savepoint settings;
+set enable_parallel_hash = on;
+set parallel_leader_participation = off;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set max_parallel_workers_per_gather = 2;
+set enable_material = off;
+set enable_mergejoin = off;
+set work_mem = '4MB';
+set hash_mem_multiplier = 1.0;
+explain (costs off)
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
+ -> Seq Scan on join_foo
+ -> Gather
+ Workers Planned: 2
+ -> Parallel Hash Join
+ Hash Cond: (b1.id = b2.id)
+ -> Parallel Seq Scan on join_bar b1
+ -> Parallel Hash
+ -> Parallel Seq Scan on join_bar b2
+(11 rows)
+
+select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+ count
+-------
+ 3
+(1 row)
+
+select final > 1 as multibatch
+ from hash_join_batches(
+$$
+ select count(*) from join_foo
+ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
+ on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
+$$);
+ multibatch
+------------
+ f
+(1 row)
+
+rollback to settings;
+-- A full outer join where every record is matched.
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+explain (costs off)
+ select count(*) from simple r full outer join simple s using (id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Full Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(6 rows)
+
+select count(*) from simple r full outer join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+rollback to settings;
+-- parallelism not possible with parallel-oblivious full hash join
+savepoint settings;
+set enable_parallel_hash = off;
+set local max_parallel_workers_per_gather = 2;
+explain (costs off)
+ select count(*) from simple r full outer join simple s using (id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Full Join
+ Hash Cond: (r.id = s.id)
+ -> Seq Scan on simple r
+ -> Hash
+ -> Seq Scan on simple s
+(6 rows)
+
+select count(*) from simple r full outer join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+rollback to settings;
+-- parallelism is possible with parallel-aware full hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+explain (costs off)
+ select count(*) from simple r full outer join simple s using (id);
+ QUERY PLAN
+-------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Full Join
+ Hash Cond: (r.id = s.id)
+ -> Parallel Seq Scan on simple r
+ -> Parallel Hash
+ -> Parallel Seq Scan on simple s
+(9 rows)
+
+select count(*) from simple r full outer join simple s using (id);
+ count
+-------
+ 20000
+(1 row)
+
+rollback to settings;
+-- A full outer join where every record is not matched.
+-- non-parallel
+savepoint settings;
+set local max_parallel_workers_per_gather = 0;
+explain (costs off)
+ select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Full Join
+ Hash Cond: ((0 - s.id) = r.id)
+ -> Seq Scan on simple s
+ -> Hash
+ -> Seq Scan on simple r
+(6 rows)
+
+select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ count
+-------
+ 40000
+(1 row)
+
+rollback to settings;
+-- parallelism not possible with parallel-oblivious full hash join
+savepoint settings;
+set enable_parallel_hash = off;
+set local max_parallel_workers_per_gather = 2;
+explain (costs off)
+ select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ QUERY PLAN
+----------------------------------------
+ Aggregate
+ -> Hash Full Join
+ Hash Cond: ((0 - s.id) = r.id)
+ -> Seq Scan on simple s
+ -> Hash
+ -> Seq Scan on simple r
+(6 rows)
+
+select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ count
+-------
+ 40000
+(1 row)
+
+rollback to settings;
+-- parallelism is possible with parallel-aware full hash join
+savepoint settings;
+set local max_parallel_workers_per_gather = 2;
+explain (costs off)
+ select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ QUERY PLAN
+-------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Full Join
+ Hash Cond: ((0 - s.id) = r.id)
+ -> Parallel Seq Scan on simple s
+ -> Parallel Hash
+ -> Parallel Seq Scan on simple r
+(9 rows)
+
+select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
+ count
+-------
+ 40000
+(1 row)
+
+rollback to settings;
+-- exercise special code paths for huge tuples (note use of non-strict
+-- expression and left join required to get the detoasted tuple into
+-- the hash table)
+-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and
+-- sts_puttuple oversized tuple cases because it's multi-batch)
+savepoint settings;
+set max_parallel_workers_per_gather = 2;
+set enable_parallel_hash = on;
+set work_mem = '128kB';
+set hash_mem_multiplier = 1.0;
+explain (costs off)
+ select length(max(s.t))
+ from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ QUERY PLAN
+----------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Left Join
+ Hash Cond: (wide.id = wide_1.id)
+ -> Parallel Seq Scan on wide
+ -> Parallel Hash
+ -> Parallel Seq Scan on wide wide_1
+(9 rows)
+
+select length(max(s.t))
+from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ length
+--------
+ 320000
+(1 row)
+
+select final > 1 as multibatch
+ from hash_join_batches(
+$$
+ select length(max(s.t))
+ from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+$$);
+ multibatch
+------------
+ t
+(1 row)
+
+rollback to settings;
+-- Hash join reuses the HOT status bit to indicate match status. This can only
+-- be guaranteed to produce correct results if all the hash join tuple match
+-- bits are reset before reuse. This is done upon loading them into the
+-- hashtable.
+SAVEPOINT settings;
+SET enable_parallel_hash = on;
+SET min_parallel_table_scan_size = 0;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+CREATE TABLE hjtest_matchbits_t1(id int);
+CREATE TABLE hjtest_matchbits_t2(id int);
+INSERT INTO hjtest_matchbits_t1 VALUES (1);
+INSERT INTO hjtest_matchbits_t2 VALUES (2);
+-- Update should create a HOT tuple. If this status bit isn't cleared, we won't
+-- correctly emit the NULL-extended unmatching tuple in full hash join.
+UPDATE hjtest_matchbits_t2 set id = 2;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id
+ ORDER BY t1.id;
+ id | id
+----+----
+ 1 |
+ | 2
+(2 rows)
+
+-- Test serial full hash join.
+-- Resetting parallel_setup_cost should force a serial plan.
+-- Just to be safe, however, set enable_parallel_hash to off, as parallel full
+-- hash joins are only supported with shared hashtables.
+RESET parallel_setup_cost;
+SET enable_parallel_hash = off;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id;
+ id | id
+----+----
+ 1 |
+ | 2
+(2 rows)
+
+ROLLBACK TO settings;
+rollback;
+-- Verify that hash key expressions reference the correct
+-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
+-- need to reference Hash's outer plan (which is below HashJoin's
+-- inner plan). It's not trivial to verify that the references are
+-- correct (we don't display the hashkeys themselves), but if the
+-- hashkeys contain subplan references, those will be displayed. Force
+-- subplans to appear just about everywhere.
+--
+-- Bug report:
+-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com
+--
+BEGIN;
+SET LOCAL enable_sort = OFF; -- avoid mergejoins
+SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order
+CREATE TABLE hjtest_1 (a text, b int, id int, c bool);
+CREATE TABLE hjtest_2 (a bool, id int, b text, c int);
+INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches
+INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition
+INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50
+INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches
+INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition
+INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55
+INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b;
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
+FROM hjtest_1, hjtest_2
+WHERE
+ hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
+ AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+ AND (SELECT hjtest_1.b * 5) < 50
+ AND (SELECT hjtest_2.c * 5) < 55
+ AND hjtest_1.a <> hjtest_2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Hash Join
+ Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass
+ Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3)))
+ Join Filter: (hjtest_1.a <> hjtest_2.b)
+ -> Seq Scan on public.hjtest_1
+ Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
+ Filter: ((SubPlan 4) < 50)
+ SubPlan 4
+ -> Result
+ Output: (hjtest_1.b * 5)
+ -> Hash
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ -> Seq Scan on public.hjtest_2
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Filter: ((SubPlan 5) < 55)
+ SubPlan 5
+ -> Result
+ Output: (hjtest_2.c * 5)
+ SubPlan 1
+ -> Result
+ Output: 1
+ One-Time Filter: (hjtest_2.id = 1)
+ SubPlan 3
+ -> Result
+ Output: (hjtest_2.c * 5)
+ SubPlan 2
+ -> Result
+ Output: (hjtest_1.b * 5)
+(28 rows)
+
+SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
+FROM hjtest_1, hjtest_2
+WHERE
+ hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
+ AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+ AND (SELECT hjtest_1.b * 5) < 50
+ AND (SELECT hjtest_2.c * 5) < 55
+ AND hjtest_1.a <> hjtest_2.b;
+ a1 | a2 | t1 | t2
+------+----+----------+----------
+ text | t | hjtest_1 | hjtest_2
+(1 row)
+
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
+FROM hjtest_2, hjtest_1
+WHERE
+ hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
+ AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+ AND (SELECT hjtest_1.b * 5) < 50
+ AND (SELECT hjtest_2.c * 5) < 55
+ AND hjtest_1.a <> hjtest_2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Hash Join
+ Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass
+ Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
+ Join Filter: (hjtest_1.a <> hjtest_2.b)
+ -> Seq Scan on public.hjtest_2
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Filter: ((SubPlan 5) < 55)
+ SubPlan 5
+ -> Result
+ Output: (hjtest_2.c * 5)
+ -> Hash
+ Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
+ -> Seq Scan on public.hjtest_1
+ Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
+ Filter: ((SubPlan 4) < 50)
+ SubPlan 4
+ -> Result
+ Output: (hjtest_1.b * 5)
+ SubPlan 2
+ -> Result
+ Output: (hjtest_1.b * 5)
+ SubPlan 1
+ -> Result
+ Output: 1
+ One-Time Filter: (hjtest_2.id = 1)
+ SubPlan 3
+ -> Result
+ Output: (hjtest_2.c * 5)
+(28 rows)
+
+SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
+FROM hjtest_2, hjtest_1
+WHERE
+ hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
+ AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
+ AND (SELECT hjtest_1.b * 5) < 50
+ AND (SELECT hjtest_2.c * 5) < 55
+ AND hjtest_1.a <> hjtest_2.b;
+ a1 | a2 | t1 | t2
+------+----+----------+----------
+ text | t | hjtest_1 | hjtest_2
+(1 row)
+
+ROLLBACK;
+-- Verify that we behave sanely when the inner hash keys contain parameters
+-- (that is, outer or lateral references). This situation has to defeat
+-- re-use of the inner hash table across rescans.
+begin;
+set local enable_hashjoin = on;
+explain (costs off)
+select i8.q2, ss.* from
+int8_tbl i8,
+lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4
+ on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss;
+ QUERY PLAN
+-----------------------------------------------------------
+ Nested Loop
+ -> Seq Scan on int8_tbl i8
+ -> Sort
+ Sort Key: t1.fivethous, i4.f1
+ -> Hash Join
+ Hash Cond: (t1.fivethous = (i4.f1 + i8.q2))
+ -> Seq Scan on tenk1 t1
+ -> Hash
+ -> Seq Scan on int4_tbl i4
+(9 rows)
+
+select i8.q2, ss.* from
+int8_tbl i8,
+lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4
+ on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss;
+ q2 | fivethous | f1
+-----+-----------+----
+ 456 | 456 | 0
+ 456 | 456 | 0
+ 123 | 123 | 0
+ 123 | 123 | 0
+(4 rows)
+
+rollback;