From 293913568e6a7a86fd1479e1cff8e2ecb58d6568 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 15:44:03 +0200 Subject: Adding upstream version 16.2. Signed-off-by: Daniel Baumann --- src/include/storage/proc.h | 466 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 src/include/storage/proc.h (limited to 'src/include/storage/proc.h') diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h new file mode 100644 index 0000000..ef74f32 --- /dev/null +++ b/src/include/storage/proc.h @@ -0,0 +1,466 @@ +/*------------------------------------------------------------------------- + * + * proc.h + * per-process shared memory data structures + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/proc.h + * + *------------------------------------------------------------------------- + */ +#ifndef _PROC_H_ +#define _PROC_H_ + +#include "access/clog.h" +#include "access/xlogdefs.h" +#include "lib/ilist.h" +#include "storage/latch.h" +#include "storage/lock.h" +#include "storage/pg_sema.h" +#include "storage/proclist_types.h" + +/* + * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds + * for non-aborted subtransactions of its current top transaction. These + * have to be treated as running XIDs by other backends. + * + * We also keep track of whether the cache overflowed (ie, the transaction has + * generated at least one subtransaction that didn't fit in the cache). + * If none of the caches have overflowed, we can assume that an XID that's not + * listed anywhere in the PGPROC array is not a running transaction. Else we + * have to look at pg_subtrans. + * + * See src/test/isolation/specs/subxid-overflow.spec if you change this. + */ +#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */ + +typedef struct XidCacheStatus +{ + /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */ + uint8 count; + /* has PGPROC->subxids overflowed */ + bool overflowed; +} XidCacheStatus; + +struct XidCache +{ + TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; +}; + +/* + * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[] + */ +#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */ +#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */ +#define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX + * CONCURRENTLY or REINDEX + * CONCURRENTLY on non-expressional, + * non-partial index */ +#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */ +#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical + * decoding outside xact */ +#define PROC_AFFECTS_ALL_HORIZONS 0x20 /* this proc's xmin must be + * included in vacuum horizons + * in all databases */ + +/* flags reset at EOXact */ +#define PROC_VACUUM_STATE_MASK \ + (PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND) + +/* + * Xmin-related flags. Make sure any flags that affect how the process' Xmin + * value is interpreted by VACUUM are included here. + */ +#define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC) + +/* + * We allow a small number of "weak" relation locks (AccessShareLock, + * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure + * rather than the main lock table. This eases contention on the lock + * manager LWLocks. See storage/lmgr/README for additional details. + */ +#define FP_LOCK_SLOTS_PER_BACKEND 16 + +/* + * An invalid pgprocno. Must be larger than the maximum number of PGPROC + * structures we could possibly have. See comments for MAX_BACKENDS. + */ +#define INVALID_PGPROCNO PG_INT32_MAX + +/* + * Flags for PGPROC.delayChkptFlags + * + * These flags can be used to delay the start or completion of a checkpoint + * for short periods. A flag is in effect if the corresponding bit is set in + * the PGPROC of any backend. + * + * For our purposes here, a checkpoint has three phases: (1) determine the + * location to which the redo pointer will be moved, (2) write all the + * data durably to disk, and (3) WAL-log the checkpoint. + * + * Setting DELAY_CHKPT_START prevents the system from moving from phase 1 + * to phase 2. This is useful when we are performing a WAL-logged modification + * of data that will be flushed to disk in phase 2. By setting this flag + * before writing WAL and clearing it after we've both written WAL and + * performed the corresponding modification, we ensure that if the WAL record + * is inserted prior to the new redo point, the corresponding data changes will + * also be flushed to disk before the checkpoint can complete. (In the + * extremely common case where the data being modified is in shared buffers + * and we acquire an exclusive content lock on the relevant buffers before + * writing WAL, this mechanism is not needed, because phase 2 will block + * until we release the content lock and then flush the modified data to + * disk.) + * + * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2 + * to phase 3. This is useful if we are performing a WAL-logged operation that + * might invalidate buffers, such as relation truncation. In this case, we need + * to ensure that any buffers which were invalidated and thus not flushed by + * the checkpoint are actually destroyed on disk. Replay can cope with a file + * or block that doesn't exist, but not with a block that has the wrong + * contents. + */ +#define DELAY_CHKPT_START (1<<0) +#define DELAY_CHKPT_COMPLETE (1<<1) + +typedef enum +{ + PROC_WAIT_STATUS_OK, + PROC_WAIT_STATUS_WAITING, + PROC_WAIT_STATUS_ERROR, +} ProcWaitStatus; + +/* + * Each backend has a PGPROC struct in shared memory. There is also a list of + * currently-unused PGPROC structs that will be reallocated to new backends. + * + * links: list link for any list the PGPROC is in. When waiting for a lock, + * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC + * is linked into ProcGlobal's freeProcs list. + * + * Note: twophase.c also sets up a dummy PGPROC struct for each currently + * prepared transaction. These PGPROCs appear in the ProcArray data structure + * so that the prepared transactions appear to be still running and are + * correctly shown as holding locks. A prepared transaction PGPROC can be + * distinguished from a real one at need by the fact that it has pid == 0. + * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused, + * but its myProcLocks[] lists are valid. + * + * We allow many fields of this struct to be accessed without locks, such as + * delayChkptFlags and isBackgroundWorker. However, keep in mind that writing + * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in + * at least shared mode, so that pgxactoff does not change concurrently. + * + * Mirrored fields: + * + * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an + * element of more densely packed ProcGlobal arrays. These arrays are indexed + * by PGPROC->pgxactoff. Both copies need to be maintained coherently. + * + * NB: The pgxactoff indexed value can *never* be accessed without holding + * locks. + * + * See PROC_HDR for details. + */ +struct PGPROC +{ + /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ + dlist_node links; /* list link if process is in a list */ + dlist_head *procgloballist; /* procglobal list that owns this PGPROC */ + + PGSemaphore sem; /* ONE semaphore to sleep on */ + ProcWaitStatus waitStatus; + + Latch procLatch; /* generic latch for process */ + + + TransactionId xid; /* id of top-level transaction currently being + * executed by this proc, if running and XID + * is assigned; else InvalidTransactionId. + * mirrored in ProcGlobal->xids[pgxactoff] */ + + TransactionId xmin; /* minimal running XID as it was when we were + * starting our xact, excluding LAZY VACUUM: + * vacuum must not remove tuples deleted by + * xid >= xmin ! */ + + LocalTransactionId lxid; /* local id of top-level transaction currently + * being executed by this proc, if running; + * else InvalidLocalTransactionId */ + int pid; /* Backend's process ID; 0 if prepared xact */ + + int pgxactoff; /* offset into various ProcGlobal->arrays with + * data mirrored from this PGPROC */ + + int pgprocno; /* Number of this PGPROC in + * ProcGlobal->allProcs array. This is set + * once by InitProcGlobal(). + * ProcGlobal->allProcs[n].pgprocno == n */ + + /* These fields are zero while a backend is still starting up: */ + BackendId backendId; /* This backend's backend ID (if assigned) */ + Oid databaseId; /* OID of database this backend is using */ + Oid roleId; /* OID of role using this backend */ + + Oid tempNamespaceId; /* OID of temp schema this backend is + * using */ + + bool isBackgroundWorker; /* true if background worker. */ + + /* + * While in hot standby mode, shows that a conflict signal has been sent + * for the current transaction. Set/cleared while holding ProcArrayLock, + * though not required. Accessed without lock, if needed. + */ + bool recoveryConflictPending; + + /* Info about LWLock the process is currently waiting for, if any. */ + uint8 lwWaiting; /* see LWLockWaitState */ + uint8 lwWaitMode; /* lwlock mode being waited for */ + proclist_node lwWaitLink; /* position in LW lock wait list */ + + /* Support for condition variables. */ + proclist_node cvWaitLink; /* position in CV wait list */ + + /* Info about lock the process is currently waiting for, if any. */ + /* waitLock and waitProcLock are NULL if not currently waiting. */ + LOCK *waitLock; /* Lock object we're sleeping on ... */ + PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */ + LOCKMODE waitLockMode; /* type of lock we're waiting for */ + LOCKMASK heldLocks; /* bitmask for lock types already held on this + * lock object by this backend */ + pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition + * started */ + + int delayChkptFlags; /* for DELAY_CHKPT_* flags */ + + uint8 statusFlags; /* this backend's status flags, see PROC_* + * above. mirrored in + * ProcGlobal->statusFlags[pgxactoff] */ + + /* + * Info to allow us to wait for synchronous replication, if needed. + * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend. + * syncRepState must not be touched except by owning process or WALSender. + * syncRepLinks used only while holding SyncRepLock. + */ + XLogRecPtr waitLSN; /* waiting for this LSN or higher */ + int syncRepState; /* wait state for sync rep */ + dlist_node syncRepLinks; /* list link if process is in syncrep queue */ + + /* + * All PROCLOCK objects for locks held or awaited by this backend are + * linked into one of these lists, according to the partition number of + * their lock. + */ + dlist_head myProcLocks[NUM_LOCK_PARTITIONS]; + + XidCacheStatus subxidStatus; /* mirrored with + * ProcGlobal->subxidStates[i] */ + struct XidCache subxids; /* cache for subtransaction XIDs */ + + /* Support for group XID clearing. */ + /* true, if member of ProcArray group waiting for XID clear */ + bool procArrayGroupMember; + /* next ProcArray group member waiting for XID clear */ + pg_atomic_uint32 procArrayGroupNext; + + /* + * latest transaction id among the transaction's main XID and + * subtransactions + */ + TransactionId procArrayGroupMemberXid; + + uint32 wait_event_info; /* proc's wait information */ + + /* Support for group transaction status update. */ + bool clogGroupMember; /* true, if member of clog group */ + pg_atomic_uint32 clogGroupNext; /* next clog group member */ + TransactionId clogGroupMemberXid; /* transaction id of clog group member */ + XidStatus clogGroupMemberXidStatus; /* transaction status of clog + * group member */ + int clogGroupMemberPage; /* clog page corresponding to + * transaction id of clog group member */ + XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog + * group member */ + + /* Lock manager data, recording fast-path locks taken by this backend. */ + LWLock fpInfoLock; /* protects per-backend fast-path state */ + uint64 fpLockBits; /* lock modes held for each fast-path slot */ + Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ + bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ + LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID + * lock */ + + /* + * Support for lock groups. Use LockHashPartitionLockByProc on the group + * leader to get the LWLock protecting these fields. + */ + PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */ + dlist_head lockGroupMembers; /* list of members, if I'm a leader */ + dlist_node lockGroupLink; /* my member link, if I'm a member */ +}; + +/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ + + +extern PGDLLIMPORT PGPROC *MyProc; + +/* + * There is one ProcGlobal struct for the whole database cluster. + * + * Adding/Removing an entry into the procarray requires holding *both* + * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are + * needed because the dense arrays (see below) are accessed from + * GetNewTransactionId() and GetSnapshotData(), and we don't want to add + * further contention by both using the same lock. Adding/Removing a procarray + * entry is much less frequent. + * + * Some fields in PGPROC are mirrored into more densely packed arrays (e.g. + * xids), with one entry for each backend. These arrays only contain entries + * for PGPROCs that have been added to the shared array with ProcArrayAdd() + * (in contrast to PGPROC array which has unused PGPROCs interspersed). + * + * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent + * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray + * member to change. Therefore it is only safe to use PGPROC->pgxactoff to + * access the dense array while holding either ProcArrayLock or XidGenLock. + * + * As long as a PGPROC is in the procarray, the mirrored values need to be + * maintained in both places in a coherent manner. + * + * The denser separate arrays are beneficial for three main reasons: First, to + * allow for as tight loops accessing the data as possible. Second, to prevent + * updates of frequently changing data (e.g. xmin) from invalidating + * cachelines also containing less frequently changing data (e.g. xid, + * statusFlags). Third to condense frequently accessed data into as few + * cachelines as possible. + * + * There are two main reasons to have the data mirrored between these dense + * arrays and PGPROC. First, as explained above, a PGPROC's array entries can + * only be accessed with either ProcArrayLock or XidGenLock held, whereas the + * PGPROC entries do not require that (obviously there may still be locking + * requirements around the individual field, separate from the concerns + * here). That is particularly important for a backend to efficiently checks + * it own values, which it often can safely do without locking. Second, the + * PGPROC fields allow to avoid unnecessary accesses and modification to the + * dense arrays. A backend's own PGPROC is more likely to be in a local cache, + * whereas the cachelines for the dense array will be modified by other + * backends (often removing it from the cache for other cores/sockets). At + * commit/abort time a check of the PGPROC value can avoid accessing/dirtying + * the corresponding array value. + * + * Basically it makes sense to access the PGPROC variable when checking a + * single backend's data, especially when already looking at the PGPROC for + * other reasons already. It makes sense to look at the "dense" arrays if we + * need to look at many / most entries, because we then benefit from the + * reduced indirection and better cross-process cache-ability. + * + * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data + * in the dense arrays is initialized from the PGPROC while it already holds + * ProcArrayLock. + */ +typedef struct PROC_HDR +{ + /* Array of PGPROC structures (not including dummies for prepared txns) */ + PGPROC *allProcs; + + /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ + TransactionId *xids; + + /* + * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the + * procarray. + */ + XidCacheStatus *subxidStates; + + /* + * Array mirroring PGPROC.statusFlags for each PGPROC currently in the + * procarray. + */ + uint8 *statusFlags; + + /* Length of allProcs array */ + uint32 allProcCount; + /* Head of list of free PGPROC structures */ + dlist_head freeProcs; + /* Head of list of autovacuum's free PGPROC structures */ + dlist_head autovacFreeProcs; + /* Head of list of bgworker free PGPROC structures */ + dlist_head bgworkerFreeProcs; + /* Head of list of walsender free PGPROC structures */ + dlist_head walsenderFreeProcs; + /* First pgproc waiting for group XID clear */ + pg_atomic_uint32 procArrayGroupFirst; + /* First pgproc waiting for group transaction status update */ + pg_atomic_uint32 clogGroupFirst; + /* WALWriter process's latch */ + Latch *walwriterLatch; + /* Checkpointer process's latch */ + Latch *checkpointerLatch; + /* Current shared estimate of appropriate spins_per_delay value */ + int spins_per_delay; + /* Buffer id of the buffer that Startup process waits for pin on, or -1 */ + int startupBufferPinWaitBufId; +} PROC_HDR; + +extern PGDLLIMPORT PROC_HDR *ProcGlobal; + +extern PGDLLIMPORT PGPROC *PreparedXactProcs; + +/* Accessor for PGPROC given a pgprocno. */ +#define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)]) + +/* + * We set aside some extra PGPROC structures for auxiliary processes, + * ie things that aren't full-fledged backends but need shmem access. + * + * Background writer, checkpointer, WAL writer and archiver run during normal + * operation. Startup process and WAL receiver also consume 2 slots, but WAL + * writer is launched only after startup has exited, so we only need 5 slots. + */ +#define NUM_AUXILIARY_PROCS 5 + +/* configurable options */ +extern PGDLLIMPORT int DeadlockTimeout; +extern PGDLLIMPORT int StatementTimeout; +extern PGDLLIMPORT int LockTimeout; +extern PGDLLIMPORT int IdleInTransactionSessionTimeout; +extern PGDLLIMPORT int IdleSessionTimeout; +extern PGDLLIMPORT bool log_lock_waits; + + +/* + * Function Prototypes + */ +extern int ProcGlobalSemas(void); +extern Size ProcGlobalShmemSize(void); +extern void InitProcGlobal(void); +extern void InitProcess(void); +extern void InitProcessPhase2(void); +extern void InitAuxiliaryProcess(void); + +extern void SetStartupBufferPinWaitBufId(int bufid); +extern int GetStartupBufferPinWaitBufId(void); + +extern bool HaveNFreeProcs(int n, int *nfree); +extern void ProcReleaseLocks(bool isCommit); + +extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); +extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus); +extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); +extern void CheckDeadLockAlert(void); +extern bool IsWaitingForLock(void); +extern void LockErrorCleanup(void); + +extern void ProcWaitForSignal(uint32 wait_event_info); +extern void ProcSendSignal(int pgprocno); + +extern PGPROC *AuxiliaryPidGetProc(int pid); + +extern void BecomeLockGroupLeader(void); +extern bool BecomeLockGroupMember(PGPROC *leader, int pid); + +#endif /* _PROC_H_ */ -- cgit v1.2.3