summaryrefslogtreecommitdiffstats
path: root/src/backend/storage/ipc/standby.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/ipc/standby.c')
-rw-r--r--src/backend/storage/ipc/standby.c1450
1 files changed, 1450 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
new file mode 100644
index 0000000..687ce03
--- /dev/null
+++ b/src/backend/storage/ipc/standby.c
@@ -0,0 +1,1450 @@
+/*-------------------------------------------------------------------------
+ *
+ * standby.c
+ * Misc functions used in Hot Standby mode.
+ *
+ * All functions for handling RM_STANDBY_ID, which relate to
+ * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
+ * Plus conflict recovery processing.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/storage/ipc/standby.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "access/transam.h"
+#include "access/twophase.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/sinvaladt.h"
+#include "storage/standby.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/ps_status.h"
+#include "utils/timeout.h"
+#include "utils/timestamp.h"
+
+/* User-settable GUC parameters */
+int vacuum_defer_cleanup_age;
+int max_standby_archive_delay = 30 * 1000;
+int max_standby_streaming_delay = 30 * 1000;
+bool log_recovery_conflict_waits = false;
+
+static HTAB *RecoveryLockLists;
+
+/* Flags set by timeout handlers */
+static volatile sig_atomic_t got_standby_deadlock_timeout = false;
+static volatile sig_atomic_t got_standby_delay_timeout = false;
+static volatile sig_atomic_t got_standby_lock_timeout = false;
+
+static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
+ ProcSignalReason reason,
+ uint32 wait_event_info,
+ bool report_waiting);
+static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
+static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
+static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
+static const char *get_recovery_conflict_desc(ProcSignalReason reason);
+
+/*
+ * Keep track of all the locks owned by a given transaction.
+ */
+typedef struct RecoveryLockListsEntry
+{
+ TransactionId xid;
+ List *locks;
+} RecoveryLockListsEntry;
+
+/*
+ * InitRecoveryTransactionEnvironment
+ * Initialize tracking of our primary's in-progress transactions.
+ *
+ * We need to issue shared invalidations and hold locks. Holding locks
+ * means others may want to wait on us, so we need to make a lock table
+ * vxact entry like a real transaction. We could create and delete
+ * lock table entries for each transaction but its simpler just to create
+ * one permanent entry and leave it there all the time. Locks are then
+ * acquired and released as needed. Yes, this means you can see the
+ * Startup process in pg_locks once we have run this.
+ */
+void
+InitRecoveryTransactionEnvironment(void)
+{
+ VirtualTransactionId vxid;
+ HASHCTL hash_ctl;
+
+ /*
+ * Initialize the hash table for tracking the list of locks held by each
+ * transaction.
+ */
+ hash_ctl.keysize = sizeof(TransactionId);
+ hash_ctl.entrysize = sizeof(RecoveryLockListsEntry);
+ RecoveryLockLists = hash_create("RecoveryLockLists",
+ 64,
+ &hash_ctl,
+ HASH_ELEM | HASH_BLOBS);
+
+ /*
+ * Initialize shared invalidation management for Startup process, being
+ * careful to register ourselves as a sendOnly process so we don't need to
+ * read messages, nor will we get signaled when the queue starts filling
+ * up.
+ */
+ SharedInvalBackendInit(true);
+
+ /*
+ * Lock a virtual transaction id for Startup process.
+ *
+ * We need to do GetNextLocalTransactionId() because
+ * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
+ * manager doesn't like that at all.
+ *
+ * Note that we don't need to run XactLockTableInsert() because nobody
+ * needs to wait on xids. That sounds a little strange, but table locks
+ * are held by vxids and row level locks are held by xids. All queries
+ * hold AccessShareLocks so never block while we write or lock new rows.
+ */
+ vxid.backendId = MyBackendId;
+ vxid.localTransactionId = GetNextLocalTransactionId();
+ VirtualXactLockTableInsert(vxid);
+
+ standbyState = STANDBY_INITIALIZED;
+}
+
+/*
+ * ShutdownRecoveryTransactionEnvironment
+ * Shut down transaction tracking
+ *
+ * Prepare to switch from hot standby mode to normal operation. Shut down
+ * recovery-time transaction tracking.
+ *
+ * This must be called even in shutdown of startup process if transaction
+ * tracking has been initialized. Otherwise some locks the tracked
+ * transactions were holding will not be released and and may interfere with
+ * the processes still running (but will exit soon later) at the exit of
+ * startup process.
+ */
+void
+ShutdownRecoveryTransactionEnvironment(void)
+{
+ /*
+ * Do nothing if RecoveryLockLists is NULL because which means that
+ * transaction tracking has not been yet initialized or has been already
+ * shutdowned. This prevents transaction tracking from being shutdowned
+ * unexpectedly more than once.
+ */
+ if (RecoveryLockLists == NULL)
+ return;
+
+ /* Mark all tracked in-progress transactions as finished. */
+ ExpireAllKnownAssignedTransactionIds();
+
+ /* Release all locks the tracked transactions were holding */
+ StandbyReleaseAllLocks();
+
+ /* Destroy the hash table of locks. */
+ hash_destroy(RecoveryLockLists);
+ RecoveryLockLists = NULL;
+
+ /* Cleanup our VirtualTransaction */
+ VirtualXactLockTableCleanup();
+}
+
+
+/*
+ * -----------------------------------------------------
+ * Standby wait timers and backend cancel logic
+ * -----------------------------------------------------
+ */
+
+/*
+ * Determine the cutoff time at which we want to start canceling conflicting
+ * transactions. Returns zero (a time safely in the past) if we are willing
+ * to wait forever.
+ */
+static TimestampTz
+GetStandbyLimitTime(void)
+{
+ TimestampTz rtime;
+ bool fromStream;
+
+ /*
+ * The cutoff time is the last WAL data receipt time plus the appropriate
+ * delay variable. Delay of -1 means wait forever.
+ */
+ GetXLogReceiptTime(&rtime, &fromStream);
+ if (fromStream)
+ {
+ if (max_standby_streaming_delay < 0)
+ return 0; /* wait forever */
+ return TimestampTzPlusMilliseconds(rtime, max_standby_streaming_delay);
+ }
+ else
+ {
+ if (max_standby_archive_delay < 0)
+ return 0; /* wait forever */
+ return TimestampTzPlusMilliseconds(rtime, max_standby_archive_delay);
+ }
+}
+
+#define STANDBY_INITIAL_WAIT_US 1000
+static int standbyWait_us = STANDBY_INITIAL_WAIT_US;
+
+/*
+ * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
+ * We wait here for a while then return. If we decide we can't wait any
+ * more then we return true, if we can wait some more return false.
+ */
+static bool
+WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
+{
+ TimestampTz ltime;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Are we past the limit time? */
+ ltime = GetStandbyLimitTime();
+ if (ltime && GetCurrentTimestamp() >= ltime)
+ return true;
+
+ /*
+ * Sleep a bit (this is essential to avoid busy-waiting).
+ */
+ pgstat_report_wait_start(wait_event_info);
+ pg_usleep(standbyWait_us);
+ pgstat_report_wait_end();
+
+ /*
+ * Progressively increase the sleep times, but not to more than 1s, since
+ * pg_usleep isn't interruptible on some platforms.
+ */
+ standbyWait_us *= 2;
+ if (standbyWait_us > 1000000)
+ standbyWait_us = 1000000;
+
+ return false;
+}
+
+/*
+ * Log the recovery conflict.
+ *
+ * wait_start is the timestamp when the caller started to wait.
+ * now is the timestamp when this function has been called.
+ * wait_list is the list of virtual transaction ids assigned to
+ * conflicting processes. still_waiting indicates whether
+ * the startup process is still waiting for the recovery conflict
+ * to be resolved or not.
+ */
+void
+LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
+ TimestampTz now, VirtualTransactionId *wait_list,
+ bool still_waiting)
+{
+ long secs;
+ int usecs;
+ long msecs;
+ StringInfoData buf;
+ int nprocs = 0;
+
+ /*
+ * There must be no conflicting processes when the recovery conflict has
+ * already been resolved.
+ */
+ Assert(still_waiting || wait_list == NULL);
+
+ TimestampDifference(wait_start, now, &secs, &usecs);
+ msecs = secs * 1000 + usecs / 1000;
+ usecs = usecs % 1000;
+
+ if (wait_list)
+ {
+ VirtualTransactionId *vxids;
+
+ /* Construct a string of list of the conflicting processes */
+ vxids = wait_list;
+ while (VirtualTransactionIdIsValid(*vxids))
+ {
+ PGPROC *proc = BackendIdGetProc(vxids->backendId);
+
+ /* proc can be NULL if the target backend is not active */
+ if (proc)
+ {
+ if (nprocs == 0)
+ {
+ initStringInfo(&buf);
+ appendStringInfo(&buf, "%d", proc->pid);
+ }
+ else
+ appendStringInfo(&buf, ", %d", proc->pid);
+
+ nprocs++;
+ }
+
+ vxids++;
+ }
+ }
+
+ /*
+ * If wait_list is specified, report the list of PIDs of active
+ * conflicting backends in a detail message. Note that if all the backends
+ * in the list are not active, no detail message is logged.
+ */
+ if (still_waiting)
+ {
+ ereport(LOG,
+ errmsg("recovery still waiting after %ld.%03d ms: %s",
+ msecs, usecs, get_recovery_conflict_desc(reason)),
+ nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
+ "Conflicting processes: %s.",
+ nprocs, buf.data) : 0);
+ }
+ else
+ {
+ ereport(LOG,
+ errmsg("recovery finished waiting after %ld.%03d ms: %s",
+ msecs, usecs, get_recovery_conflict_desc(reason)));
+ }
+
+ if (nprocs > 0)
+ pfree(buf.data);
+}
+
+/*
+ * This is the main executioner for any query backend that conflicts with
+ * recovery processing. Judgement has already been passed on it within
+ * a specific rmgr. Here we just issue the orders to the procs. The procs
+ * then throw the required error as instructed.
+ *
+ * If report_waiting is true, "waiting" is reported in PS display and the
+ * wait for recovery conflict is reported in the log, if necessary. If
+ * the caller is responsible for reporting them, report_waiting should be
+ * false. Otherwise, both the caller and this function report the same
+ * thing unexpectedly.
+ */
+static void
+ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
+ ProcSignalReason reason, uint32 wait_event_info,
+ bool report_waiting)
+{
+ TimestampTz waitStart = 0;
+ char *new_status = NULL;
+ bool logged_recovery_conflict = false;
+
+ /* Fast exit, to avoid a kernel call if there's no work to be done. */
+ if (!VirtualTransactionIdIsValid(*waitlist))
+ return;
+
+ /* Set the wait start timestamp for reporting */
+ if (report_waiting && (log_recovery_conflict_waits || update_process_title))
+ waitStart = GetCurrentTimestamp();
+
+ while (VirtualTransactionIdIsValid(*waitlist))
+ {
+ /* reset standbyWait_us for each xact we wait for */
+ standbyWait_us = STANDBY_INITIAL_WAIT_US;
+
+ /* wait until the virtual xid is gone */
+ while (!VirtualXactLock(*waitlist, false))
+ {
+ /* Is it time to kill it? */
+ if (WaitExceedsMaxStandbyDelay(wait_event_info))
+ {
+ pid_t pid;
+
+ /*
+ * Now find out who to throw out of the balloon.
+ */
+ Assert(VirtualTransactionIdIsValid(*waitlist));
+ pid = CancelVirtualTransaction(*waitlist, reason);
+
+ /*
+ * Wait a little bit for it to die so that we avoid flooding
+ * an unresponsive backend when system is heavily loaded.
+ */
+ if (pid != 0)
+ pg_usleep(5000L);
+ }
+
+ if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
+ {
+ TimestampTz now = 0;
+ bool maybe_log_conflict;
+ bool maybe_update_title;
+
+ maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
+ maybe_update_title = (update_process_title && new_status == NULL);
+
+ /* Get the current timestamp if not report yet */
+ if (maybe_log_conflict || maybe_update_title)
+ now = GetCurrentTimestamp();
+
+ /*
+ * Report via ps if we have been waiting for more than 500
+ * msec (should that be configurable?)
+ */
+ if (maybe_update_title &&
+ TimestampDifferenceExceeds(waitStart, now, 500))
+ {
+ const char *old_status;
+ int len;
+
+ old_status = get_ps_display(&len);
+ new_status = (char *) palloc(len + 8 + 1);
+ memcpy(new_status, old_status, len);
+ strcpy(new_status + len, " waiting");
+ set_ps_display(new_status);
+ new_status[len] = '\0'; /* truncate off " waiting" */
+ }
+
+ /*
+ * Emit the log message if the startup process is waiting
+ * longer than deadlock_timeout for recovery conflict.
+ */
+ if (maybe_log_conflict &&
+ TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
+ {
+ LogRecoveryConflict(reason, waitStart, now, waitlist, true);
+ logged_recovery_conflict = true;
+ }
+ }
+ }
+
+ /* The virtual transaction is gone now, wait for the next one */
+ waitlist++;
+ }
+
+ /*
+ * Emit the log message if recovery conflict was resolved but the startup
+ * process waited longer than deadlock_timeout for it.
+ */
+ if (logged_recovery_conflict)
+ LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
+ NULL, false);
+
+ /* Reset ps display if we changed it */
+ if (new_status)
+ {
+ set_ps_display(new_status);
+ pfree(new_status);
+ }
+}
+
+void
+ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
+{
+ VirtualTransactionId *backends;
+
+ /*
+ * If we get passed InvalidTransactionId then we do nothing (no conflict).
+ *
+ * This can happen when replaying already-applied WAL records after a
+ * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
+ * record that marks as frozen a page which was already all-visible. It's
+ * also quite common with records generated during index deletion
+ * (original execution of the deletion can reason that a recovery conflict
+ * which is sufficient for the deletion operation must take place before
+ * replay of the deletion record itself).
+ */
+ if (!TransactionIdIsValid(latestRemovedXid))
+ return;
+
+ backends = GetConflictingVirtualXIDs(latestRemovedXid,
+ node.dbNode);
+
+ ResolveRecoveryConflictWithVirtualXIDs(backends,
+ PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
+ WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
+ true);
+}
+
+/*
+ * Variant of ResolveRecoveryConflictWithSnapshot that works with
+ * FullTransactionId values
+ */
+void
+ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId latestRemovedFullXid,
+ RelFileNode node)
+{
+ /*
+ * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
+ * so truncate the logged FullTransactionId. If the logged value is very
+ * old, so that XID wrap-around already happened on it, there can't be any
+ * snapshots that still see it.
+ */
+ FullTransactionId nextXid = ReadNextFullTransactionId();
+ uint64 diff;
+
+ diff = U64FromFullTransactionId(nextXid) -
+ U64FromFullTransactionId(latestRemovedFullXid);
+ if (diff < MaxTransactionId / 2)
+ {
+ TransactionId latestRemovedXid;
+
+ latestRemovedXid = XidFromFullTransactionId(latestRemovedFullXid);
+ ResolveRecoveryConflictWithSnapshot(latestRemovedXid, node);
+ }
+}
+
+void
+ResolveRecoveryConflictWithTablespace(Oid tsid)
+{
+ VirtualTransactionId *temp_file_users;
+
+ /*
+ * Standby users may be currently using this tablespace for their
+ * temporary files. We only care about current users because
+ * temp_tablespace parameter will just ignore tablespaces that no longer
+ * exist.
+ *
+ * Ask everybody to cancel their queries immediately so we can ensure no
+ * temp files remain and we can remove the tablespace. Nuke the entire
+ * site from orbit, it's the only way to be sure.
+ *
+ * XXX: We could work out the pids of active backends using this
+ * tablespace by examining the temp filenames in the directory. We would
+ * then convert the pids into VirtualXIDs before attempting to cancel
+ * them.
+ *
+ * We don't wait for commit because drop tablespace is non-transactional.
+ */
+ temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId,
+ InvalidOid);
+ ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
+ PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
+ WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
+ true);
+}
+
+void
+ResolveRecoveryConflictWithDatabase(Oid dbid)
+{
+ /*
+ * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
+ * only waits for transactions and completely idle sessions would block
+ * us. This is rare enough that we do this as simply as possible: no wait,
+ * just force them off immediately.
+ *
+ * No locking is required here because we already acquired
+ * AccessExclusiveLock. Anybody trying to connect while we do this will
+ * block during InitPostgres() and then disconnect when they see the
+ * database has been removed.
+ */
+ while (CountDBBackends(dbid) > 0)
+ {
+ CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true);
+
+ /*
+ * Wait awhile for them to die so that we avoid flooding an
+ * unresponsive backend when system is heavily loaded.
+ */
+ pg_usleep(10000);
+ }
+}
+
+/*
+ * ResolveRecoveryConflictWithLock is called from ProcSleep()
+ * to resolve conflicts with other backends holding relation locks.
+ *
+ * The WaitLatch sleep normally done in ProcSleep()
+ * (when not InHotStandby) is performed here, for code clarity.
+ *
+ * We either resolve conflicts immediately or set a timeout to wake us at
+ * the limit of our patience.
+ *
+ * Resolve conflicts by canceling to all backends holding a conflicting
+ * lock. As we are already queued to be granted the lock, no new lock
+ * requests conflicting with ours will be granted in the meantime.
+ *
+ * We also must check for deadlocks involving the Startup process and
+ * hot-standby backend processes. If deadlock_timeout is reached in
+ * this function, all the backends holding the conflicting locks are
+ * requested to check themselves for deadlocks.
+ *
+ * logging_conflict should be true if the recovery conflict has not been
+ * logged yet even though logging is enabled. After deadlock_timeout is
+ * reached and the request for deadlock check is sent, we wait again to
+ * be signaled by the release of the lock if logging_conflict is false.
+ * Otherwise we return without waiting again so that the caller can report
+ * the recovery conflict. In this case, then, this function is called again
+ * with logging_conflict=false (because the recovery conflict has already
+ * been logged) and we will wait again for the lock to be released.
+ */
+void
+ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
+{
+ TimestampTz ltime;
+ TimestampTz now;
+
+ Assert(InHotStandby);
+
+ ltime = GetStandbyLimitTime();
+ now = GetCurrentTimestamp();
+
+ /*
+ * Update waitStart if first time through after the startup process
+ * started waiting for the lock. It should not be updated every time
+ * ResolveRecoveryConflictWithLock() is called during the wait.
+ *
+ * Use the current time obtained for comparison with ltime as waitStart
+ * (i.e., the time when this process started waiting for the lock). Since
+ * getting the current time newly can cause overhead, we reuse the
+ * already-obtained time to avoid that overhead.
+ *
+ * Note that waitStart is updated without holding the lock table's
+ * partition lock, to avoid the overhead by additional lock acquisition.
+ * This can cause "waitstart" in pg_locks to become NULL for a very short
+ * period of time after the wait started even though "granted" is false.
+ * This is OK in practice because we can assume that users are likely to
+ * look at "waitstart" when waiting for the lock for a long time.
+ */
+ if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
+ pg_atomic_write_u64(&MyProc->waitStart, now);
+
+ if (now >= ltime && ltime != 0)
+ {
+ /*
+ * We're already behind, so clear a path as quickly as possible.
+ */
+ VirtualTransactionId *backends;
+
+ backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
+
+ /*
+ * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
+ * "waiting" in PS display by disabling its argument report_waiting
+ * because the caller, WaitOnLock(), has already reported that.
+ */
+ ResolveRecoveryConflictWithVirtualXIDs(backends,
+ PROCSIG_RECOVERY_CONFLICT_LOCK,
+ PG_WAIT_LOCK | locktag.locktag_type,
+ false);
+ }
+ else
+ {
+ /*
+ * Wait (or wait again) until ltime, and check for deadlocks as well
+ * if we will be waiting longer than deadlock_timeout
+ */
+ EnableTimeoutParams timeouts[2];
+ int cnt = 0;
+
+ if (ltime != 0)
+ {
+ got_standby_lock_timeout = false;
+ timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AT;
+ timeouts[cnt].fin_time = ltime;
+ cnt++;
+ }
+
+ got_standby_deadlock_timeout = false;
+ timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AFTER;
+ timeouts[cnt].delay_ms = DeadlockTimeout;
+ cnt++;
+
+ enable_timeouts(timeouts, cnt);
+ }
+
+ /* Wait to be signaled by the release of the Relation Lock */
+ ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
+
+ /*
+ * Exit if ltime is reached. Then all the backends holding conflicting
+ * locks will be canceled in the next ResolveRecoveryConflictWithLock()
+ * call.
+ */
+ if (got_standby_lock_timeout)
+ goto cleanup;
+
+ if (got_standby_deadlock_timeout)
+ {
+ VirtualTransactionId *backends;
+
+ backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
+
+ /* Quick exit if there's no work to be done */
+ if (!VirtualTransactionIdIsValid(*backends))
+ goto cleanup;
+
+ /*
+ * Send signals to all the backends holding the conflicting locks, to
+ * ask them to check themselves for deadlocks.
+ */
+ while (VirtualTransactionIdIsValid(*backends))
+ {
+ SignalVirtualTransaction(*backends,
+ PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+ false);
+ backends++;
+ }
+
+ /*
+ * Exit if the recovery conflict has not been logged yet even though
+ * logging is enabled, so that the caller can log that. Then
+ * RecoveryConflictWithLock() is called again and we will wait again
+ * for the lock to be released.
+ */
+ if (logging_conflict)
+ goto cleanup;
+
+ /*
+ * Wait again here to be signaled by the release of the Relation Lock,
+ * to prevent the subsequent RecoveryConflictWithLock() from causing
+ * deadlock_timeout and sending a request for deadlocks check again.
+ * Otherwise the request continues to be sent every deadlock_timeout
+ * until the relation locks are released or ltime is reached.
+ */
+ got_standby_deadlock_timeout = false;
+ ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
+ }
+
+cleanup:
+
+ /*
+ * Clear any timeout requests established above. We assume here that the
+ * Startup process doesn't have any other outstanding timeouts than those
+ * used by this function. If that stops being true, we could cancel the
+ * timeouts individually, but that'd be slower.
+ */
+ disable_all_timeouts(false);
+ got_standby_lock_timeout = false;
+ got_standby_deadlock_timeout = false;
+}
+
+/*
+ * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
+ * to resolve conflicts with other backends holding buffer pins.
+ *
+ * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
+ * (when not InHotStandby) is performed here, for code clarity.
+ *
+ * We either resolve conflicts immediately or set a timeout to wake us at
+ * the limit of our patience.
+ *
+ * Resolve conflicts by sending a PROCSIG signal to all backends to check if
+ * they hold one of the buffer pins that is blocking Startup process. If so,
+ * those backends will take an appropriate error action, ERROR or FATAL.
+ *
+ * We also must check for deadlocks. Deadlocks occur because if queries
+ * wait on a lock, that must be behind an AccessExclusiveLock, which can only
+ * be cleared if the Startup process replays a transaction completion record.
+ * If Startup process is also waiting then that is a deadlock. The deadlock
+ * can occur if the query is waiting and then the Startup sleeps, or if
+ * Startup is sleeping and the query waits on a lock. We protect against
+ * only the former sequence here, the latter sequence is checked prior to
+ * the query sleeping, in CheckRecoveryConflictDeadlock().
+ *
+ * Deadlocks are extremely rare, and relatively expensive to check for,
+ * so we don't do a deadlock check right away ... only if we have had to wait
+ * at least deadlock_timeout.
+ */
+void
+ResolveRecoveryConflictWithBufferPin(void)
+{
+ TimestampTz ltime;
+
+ Assert(InHotStandby);
+
+ ltime = GetStandbyLimitTime();
+
+ if (GetCurrentTimestamp() >= ltime && ltime != 0)
+ {
+ /*
+ * We're already behind, so clear a path as quickly as possible.
+ */
+ SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ }
+ else
+ {
+ /*
+ * Wake up at ltime, and check for deadlocks as well if we will be
+ * waiting longer than deadlock_timeout
+ */
+ EnableTimeoutParams timeouts[2];
+ int cnt = 0;
+
+ if (ltime != 0)
+ {
+ timeouts[cnt].id = STANDBY_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AT;
+ timeouts[cnt].fin_time = ltime;
+ cnt++;
+ }
+
+ got_standby_deadlock_timeout = false;
+ timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AFTER;
+ timeouts[cnt].delay_ms = DeadlockTimeout;
+ cnt++;
+
+ enable_timeouts(timeouts, cnt);
+ }
+
+ /*
+ * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
+ * by one of the timeouts established above.
+ *
+ * We assume that only UnpinBuffer() and the timeout requests established
+ * above can wake us up here. WakeupRecovery() called by walreceiver or
+ * SIGHUP signal handler, etc cannot do that because it uses the different
+ * latch from that ProcWaitForSignal() waits on.
+ */
+ ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
+
+ if (got_standby_delay_timeout)
+ SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ else if (got_standby_deadlock_timeout)
+ {
+ /*
+ * Send out a request for hot-standby backends to check themselves for
+ * deadlocks.
+ *
+ * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
+ * to be signaled by UnpinBuffer() again and send a request for
+ * deadlocks check if deadlock_timeout happens. This causes the
+ * request to continue to be sent every deadlock_timeout until the
+ * buffer is unpinned or ltime is reached. This would increase the
+ * workload in the startup process and backends. In practice it may
+ * not be so harmful because the period that the buffer is kept pinned
+ * is basically no so long. But we should fix this?
+ */
+ SendRecoveryConflictWithBufferPin(
+ PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ }
+
+ /*
+ * Clear any timeout requests established above. We assume here that the
+ * Startup process doesn't have any other timeouts than what this function
+ * uses. If that stops being true, we could cancel the timeouts
+ * individually, but that'd be slower.
+ */
+ disable_all_timeouts(false);
+ got_standby_delay_timeout = false;
+ got_standby_deadlock_timeout = false;
+}
+
+static void
+SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
+{
+ Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
+ reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+
+ /*
+ * We send signal to all backends to ask them if they are holding the
+ * buffer pin which is delaying the Startup process. We must not set the
+ * conflict flag yet, since most backends will be innocent. Let the
+ * SIGUSR1 handling in each backend decide their own fate.
+ */
+ CancelDBBackends(InvalidOid, reason, false);
+}
+
+/*
+ * In Hot Standby perform early deadlock detection. We abort the lock
+ * wait if we are about to sleep while holding the buffer pin that Startup
+ * process is waiting for.
+ *
+ * Note: this code is pessimistic, because there is no way for it to
+ * determine whether an actual deadlock condition is present: the lock we
+ * need to wait for might be unrelated to any held by the Startup process.
+ * Sooner or later, this mechanism should get ripped out in favor of somehow
+ * accounting for buffer locks in DeadLockCheck(). However, errors here
+ * seem to be very low-probability in practice, so for now it's not worth
+ * the trouble.
+ */
+void
+CheckRecoveryConflictDeadlock(void)
+{
+ Assert(!InRecovery); /* do not call in Startup process */
+
+ if (!HoldingBufferPinThatDelaysRecovery())
+ return;
+
+ /*
+ * Error message should match ProcessInterrupts() but we avoid calling
+ * that because we aren't handling an interrupt at this point. Note that
+ * we only cancel the current transaction here, so if we are in a
+ * subtransaction and the pin is held by a parent, then the Startup
+ * process will continue to wait even though we have avoided deadlock.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
+ errmsg("canceling statement due to conflict with recovery"),
+ errdetail("User transaction caused buffer deadlock with recovery.")));
+}
+
+
+/* --------------------------------
+ * timeout handler routines
+ * --------------------------------
+ */
+
+/*
+ * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
+ * exceeded.
+ */
+void
+StandbyDeadLockHandler(void)
+{
+ got_standby_deadlock_timeout = true;
+}
+
+/*
+ * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
+ */
+void
+StandbyTimeoutHandler(void)
+{
+ got_standby_delay_timeout = true;
+}
+
+/*
+ * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
+ */
+void
+StandbyLockTimeoutHandler(void)
+{
+ got_standby_lock_timeout = true;
+}
+
+/*
+ * -----------------------------------------------------
+ * Locking in Recovery Mode
+ * -----------------------------------------------------
+ *
+ * All locks are held by the Startup process using a single virtual
+ * transaction. This implementation is both simpler and in some senses,
+ * more correct. The locks held mean "some original transaction held
+ * this lock, so query access is not allowed at this time". So the Startup
+ * process is the proxy by which the original locks are implemented.
+ *
+ * We only keep track of AccessExclusiveLocks, which are only ever held by
+ * one transaction on one relation.
+ *
+ * We keep a hash table of lists of locks in local memory keyed by xid,
+ * RecoveryLockLists, so we can keep track of the various entries made by
+ * the Startup process's virtual xid in the shared lock table.
+ *
+ * List elements use type xl_standby_lock, since the WAL record type exactly
+ * matches the information that we need to keep track of.
+ *
+ * We use session locks rather than normal locks so we don't need
+ * ResourceOwners.
+ */
+
+
+void
+StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
+{
+ RecoveryLockListsEntry *entry;
+ xl_standby_lock *newlock;
+ LOCKTAG locktag;
+ bool found;
+
+ /* Already processed? */
+ if (!TransactionIdIsValid(xid) ||
+ TransactionIdDidCommit(xid) ||
+ TransactionIdDidAbort(xid))
+ return;
+
+ elog(trace_recovery(DEBUG4),
+ "adding recovery lock: db %u rel %u", dbOid, relOid);
+
+ /* dbOid is InvalidOid when we are locking a shared relation. */
+ Assert(OidIsValid(relOid));
+
+ /* Create a new list for this xid, if we don't have one already. */
+ entry = hash_search(RecoveryLockLists, &xid, HASH_ENTER, &found);
+ if (!found)
+ {
+ entry->xid = xid;
+ entry->locks = NIL;
+ }
+
+ newlock = palloc(sizeof(xl_standby_lock));
+ newlock->xid = xid;
+ newlock->dbOid = dbOid;
+ newlock->relOid = relOid;
+ entry->locks = lappend(entry->locks, newlock);
+
+ SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
+
+ (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
+}
+
+static void
+StandbyReleaseLockList(List *locks)
+{
+ ListCell *lc;
+
+ foreach(lc, locks)
+ {
+ xl_standby_lock *lock = (xl_standby_lock *) lfirst(lc);
+ LOCKTAG locktag;
+
+ elog(trace_recovery(DEBUG4),
+ "releasing recovery lock: xid %u db %u rel %u",
+ lock->xid, lock->dbOid, lock->relOid);
+ SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid);
+ if (!LockRelease(&locktag, AccessExclusiveLock, true))
+ {
+ elog(LOG,
+ "RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
+ lock->xid, lock->dbOid, lock->relOid);
+ Assert(false);
+ }
+ }
+
+ list_free_deep(locks);
+}
+
+static void
+StandbyReleaseLocks(TransactionId xid)
+{
+ RecoveryLockListsEntry *entry;
+
+ if (TransactionIdIsValid(xid))
+ {
+ if ((entry = hash_search(RecoveryLockLists, &xid, HASH_FIND, NULL)))
+ {
+ StandbyReleaseLockList(entry->locks);
+ hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
+ }
+ }
+ else
+ StandbyReleaseAllLocks();
+}
+
+/*
+ * Release locks for a transaction tree, starting at xid down, from
+ * RecoveryLockLists.
+ *
+ * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
+ * to remove any AccessExclusiveLocks requested by a transaction.
+ */
+void
+StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
+{
+ int i;
+
+ StandbyReleaseLocks(xid);
+
+ for (i = 0; i < nsubxids; i++)
+ StandbyReleaseLocks(subxids[i]);
+}
+
+/*
+ * Called at end of recovery and when we see a shutdown checkpoint.
+ */
+void
+StandbyReleaseAllLocks(void)
+{
+ HASH_SEQ_STATUS status;
+ RecoveryLockListsEntry *entry;
+
+ elog(trace_recovery(DEBUG2), "release all standby locks");
+
+ hash_seq_init(&status, RecoveryLockLists);
+ while ((entry = hash_seq_search(&status)))
+ {
+ StandbyReleaseLockList(entry->locks);
+ hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
+ }
+}
+
+/*
+ * StandbyReleaseOldLocks
+ * Release standby locks held by top-level XIDs that aren't running,
+ * as long as they're not prepared transactions.
+ */
+void
+StandbyReleaseOldLocks(TransactionId oldxid)
+{
+ HASH_SEQ_STATUS status;
+ RecoveryLockListsEntry *entry;
+
+ hash_seq_init(&status, RecoveryLockLists);
+ while ((entry = hash_seq_search(&status)))
+ {
+ Assert(TransactionIdIsValid(entry->xid));
+
+ /* Skip if prepared transaction. */
+ if (StandbyTransactionIdIsPrepared(entry->xid))
+ continue;
+
+ /* Skip if >= oldxid. */
+ if (!TransactionIdPrecedes(entry->xid, oldxid))
+ continue;
+
+ /* Remove all locks and hash table entry. */
+ StandbyReleaseLockList(entry->locks);
+ hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
+ }
+}
+
+/*
+ * --------------------------------------------------------------------
+ * Recovery handling for Rmgr RM_STANDBY_ID
+ *
+ * These record types will only be created if XLogStandbyInfoActive()
+ * --------------------------------------------------------------------
+ */
+
+void
+standby_redo(XLogReaderState *record)
+{
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ /* Backup blocks are not used in standby records */
+ Assert(!XLogRecHasAnyBlockRefs(record));
+
+ /* Do nothing if we're not in hot standby mode */
+ if (standbyState == STANDBY_DISABLED)
+ return;
+
+ if (info == XLOG_STANDBY_LOCK)
+ {
+ xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
+ int i;
+
+ for (i = 0; i < xlrec->nlocks; i++)
+ StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid,
+ xlrec->locks[i].dbOid,
+ xlrec->locks[i].relOid);
+ }
+ else if (info == XLOG_RUNNING_XACTS)
+ {
+ xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
+ RunningTransactionsData running;
+
+ running.xcnt = xlrec->xcnt;
+ running.subxcnt = xlrec->subxcnt;
+ running.subxid_overflow = xlrec->subxid_overflow;
+ running.nextXid = xlrec->nextXid;
+ running.latestCompletedXid = xlrec->latestCompletedXid;
+ running.oldestRunningXid = xlrec->oldestRunningXid;
+ running.xids = xlrec->xids;
+
+ ProcArrayApplyRecoveryInfo(&running);
+ }
+ else if (info == XLOG_INVALIDATIONS)
+ {
+ xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record);
+
+ ProcessCommittedInvalidationMessages(xlrec->msgs,
+ xlrec->nmsgs,
+ xlrec->relcacheInitFileInval,
+ xlrec->dbId,
+ xlrec->tsId);
+ }
+ else
+ elog(PANIC, "standby_redo: unknown op code %u", info);
+}
+
+/*
+ * Log details of the current snapshot to WAL. This allows the snapshot state
+ * to be reconstructed on the standby and for logical decoding.
+ *
+ * This is used for Hot Standby as follows:
+ *
+ * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
+ * start from a shutdown checkpoint because we know nothing was running
+ * at that time and our recovery snapshot is known empty. In the more
+ * typical case of an online checkpoint we need to jump through a few
+ * hoops to get a correct recovery snapshot and this requires a two or
+ * sometimes a three stage process.
+ *
+ * The initial snapshot must contain all running xids and all current
+ * AccessExclusiveLocks at a point in time on the standby. Assembling
+ * that information while the server is running requires many and
+ * various LWLocks, so we choose to derive that information piece by
+ * piece and then re-assemble that info on the standby. When that
+ * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
+ *
+ * Since locking on the primary when we derive the information is not
+ * strict, we note that there is a time window between the derivation and
+ * writing to WAL of the derived information. That allows race conditions
+ * that we must resolve, since xids and locks may enter or leave the
+ * snapshot during that window. This creates the issue that an xid or
+ * lock may start *after* the snapshot has been derived yet *before* the
+ * snapshot is logged in the running xacts WAL record. We resolve this by
+ * starting to accumulate changes at a point just prior to when we derive
+ * the snapshot on the primary, then ignore duplicates when we later apply
+ * the snapshot from the running xacts record. This is implemented during
+ * CreateCheckpoint() where we use the logical checkpoint location as
+ * our starting point and then write the running xacts record immediately
+ * before writing the main checkpoint WAL record. Since we always start
+ * up from a checkpoint and are immediately at our starting point, we
+ * unconditionally move to STANDBY_INITIALIZED. After this point we
+ * must do 4 things:
+ * * move shared nextXid forwards as we see new xids
+ * * extend the clog and subtrans with each new xid
+ * * keep track of uncommitted known assigned xids
+ * * keep track of uncommitted AccessExclusiveLocks
+ *
+ * When we see a commit/abort we must remove known assigned xids and locks
+ * from the completing transaction. Attempted removals that cannot locate
+ * an entry are expected and must not cause an error when we are in state
+ * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
+ * KnownAssignedXidsRemove().
+ *
+ * Later, when we apply the running xact data we must be careful to ignore
+ * transactions already committed, since those commits raced ahead when
+ * making WAL entries.
+ *
+ * The loose timing also means that locks may be recorded that have a
+ * zero xid, since xids are removed from procs before locks are removed.
+ * So we must prune the lock list down to ensure we hold locks only for
+ * currently running xids, performed by StandbyReleaseOldLocks().
+ * Zero xids should no longer be possible, but we may be replaying WAL
+ * from a time when they were possible.
+ *
+ * For logical decoding only the running xacts information is needed;
+ * there's no need to look at the locking information, but it's logged anyway,
+ * as there's no independent knob to just enable logical decoding. For
+ * details of how this is used, check snapbuild.c's introductory comment.
+ *
+ *
+ * Returns the RecPtr of the last inserted record.
+ */
+XLogRecPtr
+LogStandbySnapshot(void)
+{
+ XLogRecPtr recptr;
+ RunningTransactions running;
+ xl_standby_lock *locks;
+ int nlocks;
+
+ Assert(XLogStandbyInfoActive());
+
+ /*
+ * Get details of any AccessExclusiveLocks being held at the moment.
+ */
+ locks = GetRunningTransactionLocks(&nlocks);
+ if (nlocks > 0)
+ LogAccessExclusiveLocks(nlocks, locks);
+ pfree(locks);
+
+ /*
+ * Log details of all in-progress transactions. This should be the last
+ * record we write, because standby will open up when it sees this.
+ */
+ running = GetRunningTransactionData();
+
+ /*
+ * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
+ * For Hot Standby this can be done before inserting the WAL record
+ * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
+ * the clog. For logical decoding, though, the lock can't be released
+ * early because the clog might be "in the future" from the POV of the
+ * historic snapshot. This would allow for situations where we're waiting
+ * for the end of a transaction listed in the xl_running_xacts record
+ * which, according to the WAL, has committed before the xl_running_xacts
+ * record. Fortunately this routine isn't executed frequently, and it's
+ * only a shared lock.
+ */
+ if (wal_level < WAL_LEVEL_LOGICAL)
+ LWLockRelease(ProcArrayLock);
+
+ recptr = LogCurrentRunningXacts(running);
+
+ /* Release lock if we kept it longer ... */
+ if (wal_level >= WAL_LEVEL_LOGICAL)
+ LWLockRelease(ProcArrayLock);
+
+ /* GetRunningTransactionData() acquired XidGenLock, we must release it */
+ LWLockRelease(XidGenLock);
+
+ return recptr;
+}
+
+/*
+ * Record an enhanced snapshot of running transactions into WAL.
+ *
+ * The definitions of RunningTransactionsData and xl_xact_running_xacts are
+ * similar. We keep them separate because xl_xact_running_xacts is a
+ * contiguous chunk of memory and never exists fully until it is assembled in
+ * WAL. The inserted records are marked as not being important for durability,
+ * to avoid triggering superfluous checkpoint / archiving activity.
+ */
+static XLogRecPtr
+LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
+{
+ xl_running_xacts xlrec;
+ XLogRecPtr recptr;
+
+ xlrec.xcnt = CurrRunningXacts->xcnt;
+ xlrec.subxcnt = CurrRunningXacts->subxcnt;
+ xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
+ xlrec.nextXid = CurrRunningXacts->nextXid;
+ xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
+ xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
+
+ /* Header */
+ XLogBeginInsert();
+ XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
+
+ /* array of TransactionIds */
+ if (xlrec.xcnt > 0)
+ XLogRegisterData((char *) CurrRunningXacts->xids,
+ (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
+
+ recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
+
+ if (CurrRunningXacts->subxid_overflow)
+ elog(trace_recovery(DEBUG2),
+ "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+ CurrRunningXacts->xcnt,
+ LSN_FORMAT_ARGS(recptr),
+ CurrRunningXacts->oldestRunningXid,
+ CurrRunningXacts->latestCompletedXid,
+ CurrRunningXacts->nextXid);
+ else
+ elog(trace_recovery(DEBUG2),
+ "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+ CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
+ LSN_FORMAT_ARGS(recptr),
+ CurrRunningXacts->oldestRunningXid,
+ CurrRunningXacts->latestCompletedXid,
+ CurrRunningXacts->nextXid);
+
+ /*
+ * Ensure running_xacts information is synced to disk not too far in the
+ * future. We don't want to stall anything though (i.e. use XLogFlush()),
+ * so we let the wal writer do it during normal operation.
+ * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
+ * and nudge the WALWriter into action if sleeping. Check
+ * XLogBackgroundFlush() for details why a record might not be flushed
+ * without it.
+ */
+ XLogSetAsyncXactLSN(recptr);
+
+ return recptr;
+}
+
+/*
+ * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
+ * logged, as described in backend/storage/lmgr/README.
+ */
+static void
+LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
+{
+ xl_standby_locks xlrec;
+
+ xlrec.nlocks = nlocks;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
+ XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
+ XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
+
+ (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
+}
+
+/*
+ * Individual logging of AccessExclusiveLocks for use during LockAcquire()
+ */
+void
+LogAccessExclusiveLock(Oid dbOid, Oid relOid)
+{
+ xl_standby_lock xlrec;
+
+ xlrec.xid = GetCurrentTransactionId();
+
+ xlrec.dbOid = dbOid;
+ xlrec.relOid = relOid;
+
+ LogAccessExclusiveLocks(1, &xlrec);
+ MyXactFlags |= XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK;
+}
+
+/*
+ * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
+ */
+void
+LogAccessExclusiveLockPrepare(void)
+{
+ /*
+ * Ensure that a TransactionId has been assigned to this transaction, for
+ * two reasons, both related to lock release on the standby. First, we
+ * must assign an xid so that RecordTransactionCommit() and
+ * RecordTransactionAbort() do not optimise away the transaction
+ * completion record which recovery relies upon to release locks. It's a
+ * hack, but for a corner case not worth adding code for into the main
+ * commit path. Second, we must assign an xid before the lock is recorded
+ * in shared memory, otherwise a concurrently executing
+ * GetRunningTransactionLocks() might see a lock associated with an
+ * InvalidTransactionId which we later assert cannot happen.
+ */
+ (void) GetCurrentTransactionId();
+}
+
+/*
+ * Emit WAL for invalidations. This currently is only used for commits without
+ * an xid but which contain invalidations.
+ */
+void
+LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
+ bool relcacheInitFileInval)
+{
+ xl_invalidations xlrec;
+
+ /* prepare record */
+ memset(&xlrec, 0, sizeof(xlrec));
+ xlrec.dbId = MyDatabaseId;
+ xlrec.tsId = MyDatabaseTableSpace;
+ xlrec.relcacheInitFileInval = relcacheInitFileInval;
+ xlrec.nmsgs = nmsgs;
+
+ /* perform insertion */
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfInvalidations);
+ XLogRegisterData((char *) msgs,
+ nmsgs * sizeof(SharedInvalidationMessage));
+ XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
+}
+
+/* Return the description of recovery conflict */
+static const char *
+get_recovery_conflict_desc(ProcSignalReason reason)
+{
+ const char *reasonDesc = _("unknown reason");
+
+ switch (reason)
+ {
+ case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ reasonDesc = _("recovery conflict on buffer pin");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ reasonDesc = _("recovery conflict on lock");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ reasonDesc = _("recovery conflict on tablespace");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ reasonDesc = _("recovery conflict on snapshot");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ reasonDesc = _("recovery conflict on buffer deadlock");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ reasonDesc = _("recovery conflict on database");
+ break;
+ default:
+ break;
+ }
+
+ return reasonDesc;
+}