src/test/isolation/specs/deadlock-parallel.spec


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

# Test deadlock resolution with parallel process groups.

# It's fairly hard to get parallel worker processes to block on locks,
# since generally they don't want any locks their leader didn't already
# take.  We cheat like mad here by making a function that takes a lock,
# and is incorrectly marked parallel-safe so that it can execute in a worker.

# Note that we explicitly override any global settings of isolation level
# or force_parallel_mode, to ensure we're testing what we intend to.

# Otherwise, this is morally equivalent to deadlock-soft.spec:
# Four-process deadlock with two hard edges and two soft edges.
# d2 waits for e1 (soft edge), e1 waits for d1 (hard edge),
# d1 waits for e2 (soft edge), e2 waits for d2 (hard edge).
# The deadlock detector resolves the deadlock by reversing the d1-e2 edge,
# unblocking d1.

# However ... it's not actually that well-defined whether the deadlock
# detector will prefer to unblock d1 or d2.  It depends on which backend
# is first to run DeadLockCheck after the deadlock condition is created:
# that backend will search outwards from its own wait condition, and will
# first find a loop involving the *other* lock.  We encourage that to be
# one of the d2a1 parallel workers, which will therefore unblock d1a2
# workers, by setting a shorter deadlock_timeout in session d2.  But on
# slow machines, one or more d1a2 workers may not yet have reached their
# lock waits, so that they're not unblocked by the first DeadLockCheck.
# The next DeadLockCheck may choose to unblock the d2a1 workers instead,
# which would allow d2a1 to complete before d1a2, causing the test to
# freeze up because isolationtester isn't expecting that completion order.
# (In effect, we have an undetectable deadlock because d2 is waiting for
# d1's completion, but on the client side.)  To fix this, introduce an
# additional lock (advisory lock 3), which is initially taken by d1 and
# then d2a1 will wait for it after completing the main part of the test.
# In this way, the deadlock detector can see that d1 must be completed
# first, regardless of timing.

setup
{
  create function lock_share(int,int) returns int language sql as
  'select pg_advisory_xact_lock_shared($1); select 1;' parallel safe;

  create function lock_excl(int,int) returns int language sql as
  'select pg_advisory_xact_lock($1); select 1;' parallel safe;

  create table bigt as select x from generate_series(1, 10000) x;
  analyze bigt;
}

teardown
{
  drop function lock_share(int,int);
  drop function lock_excl(int,int);
  drop table bigt;
}

session d1
setup		{ BEGIN isolation level repeatable read;
			  SET force_parallel_mode = off;
			  SET deadlock_timeout = '10s';
}
# these locks will be taken in the leader, so they will persist:
step d1a1	{ SELECT lock_share(1,x), lock_excl(3,x) FROM bigt LIMIT 1; }
# this causes all the parallel workers to take locks:
step d1a2	{ SET force_parallel_mode = on;
			  SET parallel_setup_cost = 0;
			  SET parallel_tuple_cost = 0;
			  SET min_parallel_table_scan_size = 0;
			  SET parallel_leader_participation = off;
			  SET max_parallel_workers_per_gather = 3;
			  SELECT sum(lock_share(2,x)) FROM bigt; }
step d1c	{ COMMIT; }

session d2
setup		{ BEGIN isolation level repeatable read;
			  SET force_parallel_mode = off;
			  SET deadlock_timeout = '10ms';
}
# this lock will be taken in the leader, so it will persist:
step d2a2	{ select lock_share(2,x) FROM bigt LIMIT 1; }
# this causes all the parallel workers to take locks;
# after which, make the leader take lock 3 to prevent client-driven deadlock
step d2a1	{ SET force_parallel_mode = on;
			  SET parallel_setup_cost = 0;
			  SET parallel_tuple_cost = 0;
			  SET min_parallel_table_scan_size = 0;
			  SET parallel_leader_participation = off;
			  SET max_parallel_workers_per_gather = 3;
			  SELECT sum(lock_share(1,x)) FROM bigt;
			  SET force_parallel_mode = off;
			  RESET parallel_setup_cost;
			  RESET parallel_tuple_cost;
			  SELECT lock_share(3,x) FROM bigt LIMIT 1; }
step d2c	{ COMMIT; }

session e1
setup		{ BEGIN isolation level repeatable read;
			  SET force_parallel_mode = on;
			  SET deadlock_timeout = '10s';
}
# this lock will be taken in a parallel worker, but we don't need it to persist
step e1l	{ SELECT lock_excl(1,x) FROM bigt LIMIT 1; }
step e1c	{ COMMIT; }

session e2
setup		{ BEGIN isolation level repeatable read;
			  SET force_parallel_mode = on;
			  SET deadlock_timeout = '10s';
}
# this lock will be taken in a parallel worker, but we don't need it to persist
step e2l	{ SELECT lock_excl(2,x) FROM bigt LIMIT 1; }
step e2c	{ COMMIT; }

permutation d1a1 d2a2 e1l e2l d1a2 d2a1 d1c e1c d2c e2c