/*------------------------------------------------------------------------- * * deadlock.c * POSTGRES deadlock detection code * * See src/backend/storage/lmgr/README for a description of the deadlock * detection and resolution algorithms. * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/storage/lmgr/deadlock.c * * Interface: * * DeadLockCheck() * DeadLockReport() * RememberSimpleDeadLock() * InitDeadLockChecking() * *------------------------------------------------------------------------- */ #include "postgres.h" #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "utils/memutils.h" /* * One edge in the waits-for graph. * * waiter and blocker may or may not be members of a lock group, but if either * is, it will be the leader rather than any other member of the lock group. * The group leaders act as representatives of the whole group even though * those particular processes need not be waiting at all. There will be at * least one member of the waiter's lock group on the wait queue for the given * lock, maybe more. */ typedef struct { PGPROC *waiter; /* the leader of the waiting lock group */ PGPROC *blocker; /* the leader of the group it is waiting for */ LOCK *lock; /* the lock being waited for */ int pred; /* workspace for TopoSort */ int link; /* workspace for TopoSort */ } EDGE; /* One potential reordering of a lock's wait queue */ typedef struct { LOCK *lock; /* the lock whose wait queue is described */ PGPROC **procs; /* array of PGPROC *'s in new wait order */ int nProcs; } WAIT_ORDER; /* * Information saved about each edge in a detected deadlock cycle. This * is used to print a diagnostic message upon failure. * * Note: because we want to examine this info after releasing the lock * manager's partition locks, we can't just store LOCK and PGPROC pointers; * we must extract out all the info we want to be able to print. */ typedef struct { LOCKTAG locktag; /* ID of awaited lock object */ LOCKMODE lockmode; /* type of lock we're waiting for */ int pid; /* PID of blocked backend */ } DEADLOCK_INFO; static bool DeadLockCheckRecurse(PGPROC *proc); static int TestConfiguration(PGPROC *startProc); static bool FindLockCycle(PGPROC *checkProc, EDGE *softEdges, int *nSoftEdges); static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, EDGE *softEdges, int *nSoftEdges); static bool FindLockCycleRecurseMember(PGPROC *checkProc, PGPROC *checkProcLeader, int depth, EDGE *softEdges, int *nSoftEdges); static bool ExpandConstraints(EDGE *constraints, int nConstraints); static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, PGPROC **ordering); #ifdef DEBUG_DEADLOCK static void PrintLockQueue(LOCK *lock, const char *info); #endif /* * Working space for the deadlock detector */ /* Workspace for FindLockCycle */ static PGPROC **visitedProcs; /* Array of visited procs */ static int nVisitedProcs; /* Workspace for TopoSort */ static PGPROC **topoProcs; /* Array of not-yet-output procs */ static int *beforeConstraints; /* Counts of remaining before-constraints */ static int *afterConstraints; /* List head for after-constraints */ /* Output area for ExpandConstraints */ static WAIT_ORDER *waitOrders; /* Array of proposed queue rearrangements */ static int nWaitOrders; static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */ /* Current list of constraints being considered */ static EDGE *curConstraints; static int nCurConstraints; static int maxCurConstraints; /* Storage space for results from FindLockCycle */ static EDGE *possibleConstraints; static int nPossibleConstraints; static int maxPossibleConstraints; static DEADLOCK_INFO *deadlockDetails; static int nDeadlockDetails; /* PGPROC pointer of any blocking autovacuum worker found */ static PGPROC *blocking_autovacuum_proc = NULL; /* * InitDeadLockChecking -- initialize deadlock checker during backend startup * * This does per-backend initialization of the deadlock checker; primarily, * allocation of working memory for DeadLockCheck. We do this per-backend * since there's no percentage in making the kernel do copy-on-write * inheritance of workspace from the postmaster. We want to allocate the * space at startup because (a) the deadlock checker might be invoked when * there's no free memory left, and (b) the checker is normally run inside a * signal handler, which is a very dangerous place to invoke palloc from. */ void InitDeadLockChecking(void) { MemoryContext oldcxt; /* Make sure allocations are permanent */ oldcxt = MemoryContextSwitchTo(TopMemoryContext); /* * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and * deadlockDetails[]. */ visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *)); deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO)); /* * TopoSort needs to consider at most MaxBackends wait-queue entries, and * it needn't run concurrently with FindLockCycle. */ topoProcs = visitedProcs; /* re-use this space */ beforeConstraints = (int *) palloc(MaxBackends * sizeof(int)); afterConstraints = (int *) palloc(MaxBackends * sizeof(int)); /* * We need to consider rearranging at most MaxBackends/2 wait queues * (since it takes at least two waiters in a queue to create a soft edge), * and the expanded form of the wait queues can't involve more than * MaxBackends total waiters. */ waitOrders = (WAIT_ORDER *) palloc((MaxBackends / 2) * sizeof(WAIT_ORDER)); waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *)); /* * Allow at most MaxBackends distinct constraints in a configuration. (Is * this enough? In practice it seems it should be, but I don't quite see * how to prove it. If we run out, we might fail to find a workable wait * queue rearrangement even though one exists.) NOTE that this number * limits the maximum recursion depth of DeadLockCheckRecurse. Making it * really big might potentially allow a stack-overflow problem. */ maxCurConstraints = MaxBackends; curConstraints = (EDGE *) palloc(maxCurConstraints * sizeof(EDGE)); /* * Allow up to 3*MaxBackends constraints to be saved without having to * re-run TestConfiguration. (This is probably more than enough, but we * can survive if we run low on space by doing excess runs of * TestConfiguration to re-compute constraint lists each time needed.) The * last MaxBackends entries in possibleConstraints[] are reserved as * output workspace for FindLockCycle. */ maxPossibleConstraints = MaxBackends * 4; possibleConstraints = (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE)); MemoryContextSwitchTo(oldcxt); } /* * DeadLockCheck -- Checks for deadlocks for a given process * * This code looks for deadlocks involving the given process. If any * are found, it tries to rearrange lock wait queues to resolve the * deadlock. If resolution is impossible, return DS_HARD_DEADLOCK --- * the caller is then expected to abort the given proc's transaction. * * Caller must already have locked all partitions of the lock tables. * * On failure, deadlock details are recorded in deadlockDetails[] for * subsequent printing by DeadLockReport(). That activity is separate * because (a) we don't want to do it while holding all those LWLocks, * and (b) we are typically invoked inside a signal handler. */ DeadLockState DeadLockCheck(PGPROC *proc) { int i, j; /* Initialize to "no constraints" */ nCurConstraints = 0; nPossibleConstraints = 0; nWaitOrders = 0; /* Initialize to not blocked by an autovacuum worker */ blocking_autovacuum_proc = NULL; /* Search for deadlocks and possible fixes */ if (DeadLockCheckRecurse(proc)) { /* * Call FindLockCycle one more time, to record the correct * deadlockDetails[] for the basic state with no rearrangements. */ int nSoftEdges; TRACE_POSTGRESQL_DEADLOCK_FOUND(); nWaitOrders = 0; if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges)) elog(FATAL, "deadlock seems to have disappeared"); return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */ } /* Apply any needed rearrangements of wait queues */ for (i = 0; i < nWaitOrders; i++) { LOCK *lock = waitOrders[i].lock; PGPROC **procs = waitOrders[i].procs; int nProcs = waitOrders[i].nProcs; PROC_QUEUE *waitQueue = &(lock->waitProcs); Assert(nProcs == waitQueue->size); #ifdef DEBUG_DEADLOCK PrintLockQueue(lock, "DeadLockCheck:"); #endif /* Reset the queue and re-add procs in the desired order */ ProcQueueInit(waitQueue); for (j = 0; j < nProcs; j++) { SHMQueueInsertBefore(&(waitQueue->links), &(procs[j]->links)); waitQueue->size++; } #ifdef DEBUG_DEADLOCK PrintLockQueue(lock, "rearranged to:"); #endif /* See if any waiters for the lock can be woken up now */ ProcLockWakeup(GetLocksMethodTable(lock), lock); } /* Return code tells caller if we had to escape a deadlock or not */ if (nWaitOrders > 0) return DS_SOFT_DEADLOCK; else if (blocking_autovacuum_proc != NULL) return DS_BLOCKED_BY_AUTOVACUUM; else return DS_NO_DEADLOCK; } /* * Return the PGPROC of the autovacuum that's blocking a process. * * We reset the saved pointer as soon as we pass it back. */ PGPROC * GetBlockingAutoVacuumPgproc(void) { PGPROC *ptr; ptr = blocking_autovacuum_proc; blocking_autovacuum_proc = NULL; return ptr; } /* * DeadLockCheckRecurse -- recursively search for valid orderings * * curConstraints[] holds the current set of constraints being considered * by an outer level of recursion. Add to this each possible solution * constraint for any cycle detected at this level. * * Returns true if no solution exists. Returns false if a deadlock-free * state is attainable, in which case waitOrders[] shows the required * rearrangements of lock wait queues (if any). */ static bool DeadLockCheckRecurse(PGPROC *proc) { int nEdges; int oldPossibleConstraints; bool savedList; int i; nEdges = TestConfiguration(proc); if (nEdges < 0) return true; /* hard deadlock --- no solution */ if (nEdges == 0) return false; /* good configuration found */ if (nCurConstraints >= maxCurConstraints) return true; /* out of room for active constraints? */ oldPossibleConstraints = nPossibleConstraints; if (nPossibleConstraints + nEdges + MaxBackends <= maxPossibleConstraints) { /* We can save the edge list in possibleConstraints[] */ nPossibleConstraints += nEdges; savedList = true; } else { /* Not room; will need to regenerate the edges on-the-fly */ savedList = false; } /* * Try each available soft edge as an addition to the configuration. */ for (i = 0; i < nEdges; i++) { if (!savedList && i > 0) { /* Regenerate the list of possible added constraints */ if (nEdges != TestConfiguration(proc)) elog(FATAL, "inconsistent results during deadlock check"); } curConstraints[nCurConstraints] = possibleConstraints[oldPossibleConstraints + i]; nCurConstraints++; if (!DeadLockCheckRecurse(proc)) return false; /* found a valid solution! */ /* give up on that added constraint, try again */ nCurConstraints--; } nPossibleConstraints = oldPossibleConstraints; return true; /* no solution found */ } /*-------------------- * Test a configuration (current set of constraints) for validity. * * Returns: * 0: the configuration is good (no deadlocks) * -1: the configuration has a hard deadlock or is not self-consistent * >0: the configuration has one or more soft deadlocks * * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily * and a list of its soft edges is returned beginning at * possibleConstraints+nPossibleConstraints. The return value is the * number of soft edges. *-------------------- */ static int TestConfiguration(PGPROC *startProc) { int softFound = 0; EDGE *softEdges = possibleConstraints + nPossibleConstraints; int nSoftEdges; int i; /* * Make sure we have room for FindLockCycle's output. */ if (nPossibleConstraints + MaxBackends > maxPossibleConstraints) return -1; /* * Expand current constraint set into wait orderings. Fail if the * constraint set is not self-consistent. */ if (!ExpandConstraints(curConstraints, nCurConstraints)) return -1; /* * Check for cycles involving startProc or any of the procs mentioned in * constraints. We check startProc last because if it has a soft cycle * still to be dealt with, we want to deal with that first. */ for (i = 0; i < nCurConstraints; i++) { if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges)) { if (nSoftEdges == 0) return -1; /* hard deadlock detected */ softFound = nSoftEdges; } if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges)) { if (nSoftEdges == 0) return -1; /* hard deadlock detected */ softFound = nSoftEdges; } } if (FindLockCycle(startProc, softEdges, &nSoftEdges)) { if (nSoftEdges == 0) return -1; /* hard deadlock detected */ softFound = nSoftEdges; } return softFound; } /* * FindLockCycle -- basic check for deadlock cycles * * Scan outward from the given proc to see if there is a cycle in the * waits-for graph that includes this proc. Return true if a cycle * is found, else false. If a cycle is found, we return a list of * the "soft edges", if any, included in the cycle. These edges could * potentially be eliminated by rearranging wait queues. We also fill * deadlockDetails[] with information about the detected cycle; this info * is not used by the deadlock algorithm itself, only to print a useful * message after failing. * * Since we need to be able to check hypothetical configurations that would * exist after wait queue rearrangement, the routine pays attention to the * table of hypothetical queue orders in waitOrders[]. These orders will * be believed in preference to the actual ordering seen in the locktable. */ static bool FindLockCycle(PGPROC *checkProc, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ { nVisitedProcs = 0; nDeadlockDetails = 0; *nSoftEdges = 0; return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges); } static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ { int i; dlist_iter iter; /* * If this process is a lock group member, check the leader instead. (Note * that we might be the leader, in which case this is a no-op.) */ if (checkProc->lockGroupLeader != NULL) checkProc = checkProc->lockGroupLeader; /* * Have we already seen this proc? */ for (i = 0; i < nVisitedProcs; i++) { if (visitedProcs[i] == checkProc) { /* If we return to starting point, we have a deadlock cycle */ if (i == 0) { /* * record total length of cycle --- outer levels will now fill * deadlockDetails[] */ Assert(depth <= MaxBackends); nDeadlockDetails = depth; return true; } /* * Otherwise, we have a cycle but it does not include the start * point, so say "no deadlock". */ return false; } } /* Mark proc as seen */ Assert(nVisitedProcs < MaxBackends); visitedProcs[nVisitedProcs++] = checkProc; /* * If the process is waiting, there is an outgoing waits-for edge to each * process that blocks it. */ if (checkProc->links.next != NULL && checkProc->waitLock != NULL && FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges, nSoftEdges)) return true; /* * If the process is not waiting, there could still be outgoing waits-for * edges if it is part of a lock group, because other members of the lock * group might be waiting even though this process is not. (Given lock * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2, * that is a deadlock even neither of B1 and A2 are waiting for anything.) */ dlist_foreach(iter, &checkProc->lockGroupMembers) { PGPROC *memberProc; memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur); if (memberProc->links.next != NULL && memberProc->waitLock != NULL && memberProc != checkProc && FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges, nSoftEdges)) return true; } return false; } static bool FindLockCycleRecurseMember(PGPROC *checkProc, PGPROC *checkProcLeader, int depth, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ { PGPROC *proc; LOCK *lock = checkProc->waitLock; PGXACT *pgxact; PROCLOCK *proclock; SHM_QUEUE *procLocks; LockMethod lockMethodTable; PROC_QUEUE *waitQueue; int queue_size; int conflictMask; int i; int numLockModes, lm; /* * The relation extension or page lock can never participate in actual * deadlock cycle. See Asserts in LockAcquireExtended. So, there is no * advantage in checking wait edges from them. */ if (LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND || (LOCK_LOCKTAG(*lock) == LOCKTAG_PAGE)) return false; lockMethodTable = GetLocksMethodTable(lock); numLockModes = lockMethodTable->numLockModes; conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode]; /* * Scan for procs that already hold conflicting locks. These are "hard" * edges in the waits-for graph. */ procLocks = &(lock->procLocks); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { PGPROC *leader; proc = proclock->tag.myProc; pgxact = &ProcGlobal->allPgXact[proc->pgprocno]; leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; /* A proc never blocks itself or any other lock group member */ if (leader != checkProcLeader) { for (lm = 1; lm <= numLockModes; lm++) { if ((proclock->holdMask & LOCKBIT_ON(lm)) && (conflictMask & LOCKBIT_ON(lm))) { /* This proc hard-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; return true; } /* * No deadlock here, but see if this proc is an autovacuum * that is directly hard-blocking our own proc. If so, * report it so that the caller can send a cancel signal * to it, if appropriate. If there's more than one such * proc, it's indeterminate which one will be reported. * * We don't touch autovacuums that are indirectly blocking * us; it's up to the direct blockee to take action. This * rule simplifies understanding the behavior and ensures * that an autovacuum won't be canceled with less than * deadlock_timeout grace period. * * Note we read vacuumFlags without any locking. This is * OK only for checking the PROC_IS_AUTOVACUUM flag, * because that flag is set at process start and never * reset. There is logic elsewhere to avoid canceling an * autovacuum that is working to prevent XID wraparound * problems (which needs to read a different vacuumFlag * bit), but we don't do that here to avoid grabbing * ProcArrayLock. */ if (checkProc == MyProc && pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) blocking_autovacuum_proc = proc; /* We're done looking at this proclock */ break; } } } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } /* * Scan for procs that are ahead of this one in the lock's wait queue. * Those that have conflicting requests soft-block this one. This must be * done after the hard-block search, since if another proc both hard- and * soft-blocks this one, we want to call it a hard edge. * * If there is a proposed re-ordering of the lock's wait order, use that * rather than the current wait order. */ for (i = 0; i < nWaitOrders; i++) { if (waitOrders[i].lock == lock) break; } if (i < nWaitOrders) { /* Use the given hypothetical wait queue order */ PGPROC **procs = waitOrders[i].procs; queue_size = waitOrders[i].nProcs; for (i = 0; i < queue_size; i++) { PGPROC *leader; proc = procs[i]; leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; /* * TopoSort will always return an ordering with group members * adjacent to each other in the wait queue (see comments * therein). So, as soon as we reach a process in the same lock * group as checkProc, we know we've found all the conflicts that * precede any member of the lock group lead by checkProcLeader. */ if (leader == checkProcLeader) break; /* Is there a conflict with this guy's request? */ if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0) { /* This proc soft-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; /* * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); softEdges[*nSoftEdges].waiter = checkProcLeader; softEdges[*nSoftEdges].blocker = leader; softEdges[*nSoftEdges].lock = lock; (*nSoftEdges)++; return true; } } } } else { PGPROC *lastGroupMember = NULL; /* Use the true lock wait queue order */ waitQueue = &(lock->waitProcs); /* * Find the last member of the lock group that is present in the wait * queue. Anything after this is not a soft lock conflict. If group * locking is not in use, then we know immediately which process we're * looking for, but otherwise we've got to search the wait queue to * find the last process actually present. */ if (checkProc->lockGroupLeader == NULL) lastGroupMember = checkProc; else { proc = (PGPROC *) waitQueue->links.next; queue_size = waitQueue->size; while (queue_size-- > 0) { if (proc->lockGroupLeader == checkProcLeader) lastGroupMember = proc; proc = (PGPROC *) proc->links.next; } Assert(lastGroupMember != NULL); } /* * OK, now rescan (or scan) the queue to identify the soft conflicts. */ queue_size = waitQueue->size; proc = (PGPROC *) waitQueue->links.next; while (queue_size-- > 0) { PGPROC *leader; leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; /* Done when we reach the target proc */ if (proc == lastGroupMember) break; /* Is there a conflict with this guy's request? */ if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 && leader != checkProcLeader) { /* This proc soft-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; /* * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); softEdges[*nSoftEdges].waiter = checkProcLeader; softEdges[*nSoftEdges].blocker = leader; softEdges[*nSoftEdges].lock = lock; (*nSoftEdges)++; return true; } } proc = (PGPROC *) proc->links.next; } } /* * No conflict detected here. */ return false; } /* * ExpandConstraints -- expand a list of constraints into a set of * specific new orderings for affected wait queues * * Input is a list of soft edges to be reversed. The output is a list * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array * workspace in waitOrderProcs[]. * * Returns true if able to build an ordering that satisfies all the * constraints, false if not (there are contradictory constraints). */ static bool ExpandConstraints(EDGE *constraints, int nConstraints) { int nWaitOrderProcs = 0; int i, j; nWaitOrders = 0; /* * Scan constraint list backwards. This is because the last-added * constraint is the only one that could fail, and so we want to test it * for inconsistency first. */ for (i = nConstraints; --i >= 0;) { LOCK *lock = constraints[i].lock; /* Did we already make a list for this lock? */ for (j = nWaitOrders; --j >= 0;) { if (waitOrders[j].lock == lock) break; } if (j >= 0) continue; /* No, so allocate a new list */ waitOrders[nWaitOrders].lock = lock; waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs; waitOrders[nWaitOrders].nProcs = lock->waitProcs.size; nWaitOrderProcs += lock->waitProcs.size; Assert(nWaitOrderProcs <= MaxBackends); /* * Do the topo sort. TopoSort need not examine constraints after this * one, since they must be for different locks. */ if (!TopoSort(lock, constraints, i + 1, waitOrders[nWaitOrders].procs)) return false; nWaitOrders++; } return true; } /* * TopoSort -- topological sort of a wait queue * * Generate a re-ordering of a lock's wait queue that satisfies given * constraints about certain procs preceding others. (Each such constraint * is a fact of a partial ordering.) Minimize rearrangement of the queue * not needed to achieve the partial ordering. * * This is a lot simpler and slower than, for example, the topological sort * algorithm shown in Knuth's Volume 1. However, Knuth's method doesn't * try to minimize the damage to the existing order. In practice we are * not likely to be working with more than a few constraints, so the apparent * slowness of the algorithm won't really matter. * * The initial queue ordering is taken directly from the lock's wait queue. * The output is an array of PGPROC pointers, of length equal to the lock's * wait queue length (the caller is responsible for providing this space). * The partial order is specified by an array of EDGE structs. Each EDGE * is one that we need to reverse, therefore the "waiter" must appear before * the "blocker" in the output array. The EDGE array may well contain * edges associated with other locks; these should be ignored. * * Returns true if able to build an ordering that satisfies all the * constraints, false if not (there are contradictory constraints). */ static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, PGPROC **ordering) /* output argument */ { PROC_QUEUE *waitQueue = &(lock->waitProcs); int queue_size = waitQueue->size; PGPROC *proc; int i, j, jj, k, kk, last; /* First, fill topoProcs[] array with the procs in their current order */ proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < queue_size; i++) { topoProcs[i] = proc; proc = (PGPROC *) proc->links.next; } /* * Scan the constraints, and for each proc in the array, generate a count * of the number of constraints that say it must be before something else, * plus a list of the constraints that say it must be after something * else. The count for the j'th proc is stored in beforeConstraints[j], * and the head of its list in afterConstraints[j]. Each constraint * stores its list link in constraints[i].link (note any constraint will * be in just one list). The array index for the before-proc of the i'th * constraint is remembered in constraints[i].pred. * * Note that it's not necessarily the case that every constraint affects * this particular wait queue. Prior to group locking, a process could be * waiting for at most one lock. But a lock group can be waiting for * zero, one, or multiple locks. Since topoProcs[] is an array of the * processes actually waiting, while constraints[] is an array of group * leaders, we've got to scan through topoProcs[] for each constraint, * checking whether both a waiter and a blocker for that group are * present. If so, the constraint is relevant to this wait queue; if not, * it isn't. */ MemSet(beforeConstraints, 0, queue_size * sizeof(int)); MemSet(afterConstraints, 0, queue_size * sizeof(int)); for (i = 0; i < nConstraints; i++) { /* * Find a representative process that is on the lock queue and part of * the waiting lock group. This may or may not be the leader, which * may or may not be waiting at all. If there are any other processes * in the same lock group on the queue, set their number of * beforeConstraints to -1 to indicate that they should be emitted * with their groupmates rather than considered separately. * * In this loop and the similar one just below, it's critical that we * consistently select the same representative member of any one lock * group, so that all the constraints are associated with the same * proc, and the -1's are only associated with not-representative * members. We select the last one in the topoProcs array. */ proc = constraints[i].waiter; Assert(proc != NULL); jj = -1; for (j = queue_size; --j >= 0;) { PGPROC *waiter = topoProcs[j]; if (waiter == proc || waiter->lockGroupLeader == proc) { Assert(waiter->waitLock == lock); if (jj == -1) jj = j; else { Assert(beforeConstraints[j] <= 0); beforeConstraints[j] = -1; } } } /* If no matching waiter, constraint is not relevant to this lock. */ if (jj < 0) continue; /* * Similarly, find a representative process that is on the lock queue * and waiting for the blocking lock group. Again, this could be the * leader but does not need to be. */ proc = constraints[i].blocker; Assert(proc != NULL); kk = -1; for (k = queue_size; --k >= 0;) { PGPROC *blocker = topoProcs[k]; if (blocker == proc || blocker->lockGroupLeader == proc) { Assert(blocker->waitLock == lock); if (kk == -1) kk = k; else { Assert(beforeConstraints[k] <= 0); beforeConstraints[k] = -1; } } } /* If no matching blocker, constraint is not relevant to this lock. */ if (kk < 0) continue; Assert(beforeConstraints[jj] >= 0); beforeConstraints[jj]++; /* waiter must come before */ /* add this constraint to list of after-constraints for blocker */ constraints[i].pred = jj; constraints[i].link = afterConstraints[kk]; afterConstraints[kk] = i + 1; } /*-------------------- * Now scan the topoProcs array backwards. At each step, output the * last proc that has no remaining before-constraints plus any other * members of the same lock group; then decrease the beforeConstraints * count of each of the procs it was constrained against. * i = index of ordering[] entry we want to output this time * j = search index for topoProcs[] * k = temp for scanning constraint list for proc j * last = last non-null index in topoProcs (avoid redundant searches) *-------------------- */ last = queue_size - 1; for (i = queue_size - 1; i >= 0;) { int c; int nmatches = 0; /* Find next candidate to output */ while (topoProcs[last] == NULL) last--; for (j = last; j >= 0; j--) { if (topoProcs[j] != NULL && beforeConstraints[j] == 0) break; } /* If no available candidate, topological sort fails */ if (j < 0) return false; /* * Output everything in the lock group. There's no point in * outputting an ordering where members of the same lock group are not * consecutive on the wait queue: if some other waiter is between two * requests that belong to the same group, then either it conflicts * with both of them and is certainly not a solution; or it conflicts * with at most one of them and is thus isomorphic to an ordering * where the group members are consecutive. */ proc = topoProcs[j]; if (proc->lockGroupLeader != NULL) proc = proc->lockGroupLeader; Assert(proc != NULL); for (c = 0; c <= last; ++c) { if (topoProcs[c] == proc || (topoProcs[c] != NULL && topoProcs[c]->lockGroupLeader == proc)) { ordering[i - nmatches] = topoProcs[c]; topoProcs[c] = NULL; ++nmatches; } } Assert(nmatches > 0); i -= nmatches; /* Update beforeConstraints counts of its predecessors */ for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link) beforeConstraints[constraints[k - 1].pred]--; } /* Done */ return true; } #ifdef DEBUG_DEADLOCK static void PrintLockQueue(LOCK *lock, const char *info) { PROC_QUEUE *waitQueue = &(lock->waitProcs); int queue_size = waitQueue->size; PGPROC *proc; int i; printf("%s lock %p queue ", info, lock); proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < queue_size; i++) { printf(" %d", proc->pid); proc = (PGPROC *) proc->links.next; } printf("\n"); fflush(stdout); } #endif /* * Report a detected deadlock, with available details. */ void DeadLockReport(void) { StringInfoData clientbuf; /* errdetail for client */ StringInfoData logbuf; /* errdetail for server log */ StringInfoData locktagbuf; int i; initStringInfo(&clientbuf); initStringInfo(&logbuf); initStringInfo(&locktagbuf); /* Generate the "waits for" lines sent to the client */ for (i = 0; i < nDeadlockDetails; i++) { DEADLOCK_INFO *info = &deadlockDetails[i]; int nextpid; /* The last proc waits for the first one... */ if (i < nDeadlockDetails - 1) nextpid = info[1].pid; else nextpid = deadlockDetails[0].pid; /* reset locktagbuf to hold next object description */ resetStringInfo(&locktagbuf); DescribeLockTag(&locktagbuf, &info->locktag); if (i > 0) appendStringInfoChar(&clientbuf, '\n'); appendStringInfo(&clientbuf, _("Process %d waits for %s on %s; blocked by process %d."), info->pid, GetLockmodeName(info->locktag.locktag_lockmethodid, info->lockmode), locktagbuf.data, nextpid); } /* Duplicate all the above for the server ... */ appendBinaryStringInfo(&logbuf, clientbuf.data, clientbuf.len); /* ... and add info about query strings */ for (i = 0; i < nDeadlockDetails; i++) { DEADLOCK_INFO *info = &deadlockDetails[i]; appendStringInfoChar(&logbuf, '\n'); appendStringInfo(&logbuf, _("Process %d: %s"), info->pid, pgstat_get_backend_current_activity(info->pid, false)); } pgstat_report_deadlock(); ereport(ERROR, (errcode(ERRCODE_T_R_DEADLOCK_DETECTED), errmsg("deadlock detected"), errdetail_internal("%s", clientbuf.data), errdetail_log("%s", logbuf.data), errhint("See server log for query details."))); } /* * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode * on lock, but proc2 is already waiting and would be blocked by proc1. */ void RememberSimpleDeadLock(PGPROC *proc1, LOCKMODE lockmode, LOCK *lock, PGPROC *proc2) { DEADLOCK_INFO *info = &deadlockDetails[0]; info->locktag = lock->tag; info->lockmode = lockmode; info->pid = proc1->pid; info++; info->locktag = proc2->waitLock->tag; info->lockmode = proc2->waitLockMode; info->pid = proc2->pid; nDeadlockDetails = 2; }