summaryrefslogtreecommitdiffstats
path: root/src/include/utils/snapshot.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/utils/snapshot.h')
-rw-r--r--src/include/utils/snapshot.h219
1 files changed, 219 insertions, 0 deletions
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
new file mode 100644
index 0000000..4e96f1a
--- /dev/null
+++ b/src/include/utils/snapshot.h
@@ -0,0 +1,219 @@
+/*-------------------------------------------------------------------------
+ *
+ * snapshot.h
+ * POSTGRES snapshot definition
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/snapshot.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SNAPSHOT_H
+#define SNAPSHOT_H
+
+#include "access/htup.h"
+#include "access/xlogdefs.h"
+#include "datatype/timestamp.h"
+#include "lib/pairingheap.h"
+#include "storage/buf.h"
+
+
+/*
+ * The different snapshot types. We use SnapshotData structures to represent
+ * both "regular" (MVCC) snapshots and "special" snapshots that have non-MVCC
+ * semantics. The specific semantics of a snapshot are encoded by its type.
+ *
+ * The behaviour of each type of snapshot should be documented alongside its
+ * enum value, best in terms that are not specific to an individual table AM.
+ *
+ * The reason the snapshot type rather than a callback as it used to be is
+ * that that allows to use the same snapshot for different table AMs without
+ * having one callback per AM.
+ */
+typedef enum SnapshotType
+{
+ /*-------------------------------------------------------------------------
+ * A tuple is visible iff the tuple is valid for the given MVCC snapshot.
+ *
+ * Here, we consider the effects of:
+ * - all transactions committed as of the time of the given snapshot
+ * - previous commands of this transaction
+ *
+ * Does _not_ include:
+ * - transactions shown as in-progress by the snapshot
+ * - transactions started after the snapshot was taken
+ * - changes made by the current command
+ * -------------------------------------------------------------------------
+ */
+ SNAPSHOT_MVCC = 0,
+
+ /*-------------------------------------------------------------------------
+ * A tuple is visible iff the tuple is valid "for itself".
+ *
+ * Here, we consider the effects of:
+ * - all committed transactions (as of the current instant)
+ * - previous commands of this transaction
+ * - changes made by the current command
+ *
+ * Does _not_ include:
+ * - in-progress transactions (as of the current instant)
+ * -------------------------------------------------------------------------
+ */
+ SNAPSHOT_SELF,
+
+ /*
+ * Any tuple is visible.
+ */
+ SNAPSHOT_ANY,
+
+ /*
+ * A tuple is visible iff the tuple is valid as a TOAST row.
+ */
+ SNAPSHOT_TOAST,
+
+ /*-------------------------------------------------------------------------
+ * A tuple is visible iff the tuple is valid including effects of open
+ * transactions.
+ *
+ * Here, we consider the effects of:
+ * - all committed and in-progress transactions (as of the current instant)
+ * - previous commands of this transaction
+ * - changes made by the current command
+ *
+ * This is essentially like SNAPSHOT_SELF as far as effects of the current
+ * transaction and committed/aborted xacts are concerned. However, it
+ * also includes the effects of other xacts still in progress.
+ *
+ * A special hack is that when a snapshot of this type is used to
+ * determine tuple visibility, the passed-in snapshot struct is used as an
+ * output argument to return the xids of concurrent xacts that affected
+ * the tuple. snapshot->xmin is set to the tuple's xmin if that is
+ * another transaction that's still in progress; or to
+ * InvalidTransactionId if the tuple's xmin is committed good, committed
+ * dead, or my own xact. Similarly for snapshot->xmax and the tuple's
+ * xmax. If the tuple was inserted speculatively, meaning that the
+ * inserter might still back down on the insertion without aborting the
+ * whole transaction, the associated token is also returned in
+ * snapshot->speculativeToken. See also InitDirtySnapshot().
+ * -------------------------------------------------------------------------
+ */
+ SNAPSHOT_DIRTY,
+
+ /*
+ * A tuple is visible iff it follows the rules of SNAPSHOT_MVCC, but
+ * supports being called in timetravel context (for decoding catalog
+ * contents in the context of logical decoding).
+ */
+ SNAPSHOT_HISTORIC_MVCC,
+
+ /*
+ * A tuple is visible iff the tuple might be visible to some transaction;
+ * false if it's surely dead to everyone, i.e., vacuumable.
+ *
+ * For visibility checks snapshot->min must have been set up with the xmin
+ * horizon to use.
+ */
+ SNAPSHOT_NON_VACUUMABLE
+} SnapshotType;
+
+typedef struct SnapshotData *Snapshot;
+
+#define InvalidSnapshot ((Snapshot) NULL)
+
+/*
+ * Struct representing all kind of possible snapshots.
+ *
+ * There are several different kinds of snapshots:
+ * * Normal MVCC snapshots
+ * * MVCC snapshots taken during recovery (in Hot-Standby mode)
+ * * Historic MVCC snapshots used during logical decoding
+ * * snapshots passed to HeapTupleSatisfiesDirty()
+ * * snapshots passed to HeapTupleSatisfiesNonVacuumable()
+ * * snapshots used for SatisfiesAny, Toast, Self where no members are
+ * accessed.
+ *
+ * TODO: It's probably a good idea to split this struct using a NodeTag
+ * similar to how parser and executor nodes are handled, with one type for
+ * each different kind of snapshot to avoid overloading the meaning of
+ * individual fields.
+ */
+typedef struct SnapshotData
+{
+ SnapshotType snapshot_type; /* type of snapshot */
+
+ /*
+ * The remaining fields are used only for MVCC snapshots, and are normally
+ * just zeroes in special snapshots. (But xmin and xmax are used
+ * specially by HeapTupleSatisfiesDirty, and xmin is used specially by
+ * HeapTupleSatisfiesNonVacuumable.)
+ *
+ * An MVCC snapshot can never see the effects of XIDs >= xmax. It can see
+ * the effects of all older XIDs except those listed in the snapshot. xmin
+ * is stored as an optimization to avoid needing to search the XID arrays
+ * for most tuples.
+ */
+ TransactionId xmin; /* all XID < xmin are visible to me */
+ TransactionId xmax; /* all XID >= xmax are invisible to me */
+
+ /*
+ * For normal MVCC snapshot this contains the all xact IDs that are in
+ * progress, unless the snapshot was taken during recovery in which case
+ * it's empty. For historic MVCC snapshots, the meaning is inverted, i.e.
+ * it contains *committed* transactions between xmin and xmax.
+ *
+ * note: all ids in xip[] satisfy xmin <= xip[i] < xmax
+ */
+ TransactionId *xip;
+ uint32 xcnt; /* # of xact ids in xip[] */
+
+ /*
+ * For non-historic MVCC snapshots, this contains subxact IDs that are in
+ * progress (and other transactions that are in progress if taken during
+ * recovery). For historic snapshot it contains *all* xids assigned to the
+ * replayed transaction, including the toplevel xid.
+ *
+ * note: all ids in subxip[] are >= xmin, but we don't bother filtering
+ * out any that are >= xmax
+ */
+ TransactionId *subxip;
+ int32 subxcnt; /* # of xact ids in subxip[] */
+ bool suboverflowed; /* has the subxip array overflowed? */
+
+ bool takenDuringRecovery; /* recovery-shaped snapshot? */
+ bool copied; /* false if it's a static snapshot */
+
+ CommandId curcid; /* in my xact, CID < curcid are visible */
+
+ /*
+ * An extra return value for HeapTupleSatisfiesDirty, not used in MVCC
+ * snapshots.
+ */
+ uint32 speculativeToken;
+
+ /*
+ * For SNAPSHOT_NON_VACUUMABLE (and hopefully more in the future) this is
+ * used to determine whether row could be vacuumed.
+ */
+ struct GlobalVisState *vistest;
+
+ /*
+ * Book-keeping information, used by the snapshot manager
+ */
+ uint32 active_count; /* refcount on ActiveSnapshot stack */
+ uint32 regd_count; /* refcount on RegisteredSnapshots */
+ pairingheap_node ph_node; /* link in the RegisteredSnapshots heap */
+
+ TimestampTz whenTaken; /* timestamp when snapshot was taken */
+ XLogRecPtr lsn; /* position in the WAL stream when taken */
+
+ /*
+ * The transaction completion count at the time GetSnapshotData() built
+ * this snapshot. Allows to avoid re-computing static snapshots when no
+ * transactions completed since the last GetSnapshotData().
+ */
+ uint64 snapXactCompletionCount;
+} SnapshotData;
+
+#endif /* SNAPSHOT_H */