From 6eb9c5a5657d1fe77b55cc261450f3538d35a94d Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 4 May 2024 14:19:15 +0200
Subject: Adding upstream version 13.4.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/include/storage/.gitignore            |    1 +
 src/include/storage/backendid.h           |   37 +
 src/include/storage/barrier.h             |   45 ++
 src/include/storage/block.h               |  121 ++++
 src/include/storage/buf.h                 |   46 ++
 src/include/storage/buf_internals.h       |  341 ++++++++++
 src/include/storage/buffile.h             |   54 ++
 src/include/storage/bufmgr.h              |  292 ++++++++
 src/include/storage/bufpage.h             |  459 +++++++++++++
 src/include/storage/checksum.h            |   24 +
 src/include/storage/checksum_impl.h       |  215 ++++++
 src/include/storage/condition_variable.h  |   62 ++
 src/include/storage/copydir.h             |   19 +
 src/include/storage/dsm.h                 |   61 ++
 src/include/storage/dsm_impl.h            |   75 +++
 src/include/storage/fd.h                  |  168 +++++
 src/include/storage/freespace.h           |   39 ++
 src/include/storage/fsm_internals.h       |   72 ++
 src/include/storage/indexfsm.h            |   26 +
 src/include/storage/ipc.h                 |   81 +++
 src/include/storage/item.h                |   19 +
 src/include/storage/itemid.h              |  184 +++++
 src/include/storage/itemptr.h             |  206 ++++++
 src/include/storage/large_object.h        |  100 +++
 src/include/storage/latch.h               |  190 ++++++
 src/include/storage/lmgr.h                |  114 ++++
 src/include/storage/lock.h                |  612 +++++++++++++++++
 src/include/storage/lockdefs.h            |   59 ++
 src/include/storage/lwlock.h              |  232 +++++++
 src/include/storage/md.h                  |   52 ++
 src/include/storage/off.h                 |   57 ++
 src/include/storage/pg_sema.h             |   61 ++
 src/include/storage/pg_shmem.h            |   90 +++
 src/include/storage/pmsignal.h            |   94 +++
 src/include/storage/predicate.h           |   87 +++
 src/include/storage/predicate_internals.h |  493 ++++++++++++++
 src/include/storage/proc.h                |  333 +++++++++
 src/include/storage/procarray.h           |  130 ++++
 src/include/storage/proclist.h            |  219 ++++++
 src/include/storage/proclist_types.h      |   51 ++
 src/include/storage/procsignal.h          |   75 +++
 src/include/storage/reinit.h              |   28 +
 src/include/storage/relfilenode.h         |   99 +++
 src/include/storage/s_lock.h              | 1047 +++++++++++++++++++++++++++++
 src/include/storage/sharedfileset.h       |   45 ++
 src/include/storage/shm_mq.h              |   85 +++
 src/include/storage/shm_toc.h             |   58 ++
 src/include/storage/shmem.h               |   81 +++
 src/include/storage/sinval.h              |  153 +++++
 src/include/storage/sinvaladt.h           |   43 ++
 src/include/storage/smgr.h                |  109 +++
 src/include/storage/spin.h                |   77 +++
 src/include/storage/standby.h             |   91 +++
 src/include/storage/standbydefs.h         |   74 ++
 src/include/storage/sync.h                |   62 ++
 55 files changed, 7748 insertions(+)
 create mode 100644 src/include/storage/.gitignore
 create mode 100644 src/include/storage/backendid.h
 create mode 100644 src/include/storage/barrier.h
 create mode 100644 src/include/storage/block.h
 create mode 100644 src/include/storage/buf.h
 create mode 100644 src/include/storage/buf_internals.h
 create mode 100644 src/include/storage/buffile.h
 create mode 100644 src/include/storage/bufmgr.h
 create mode 100644 src/include/storage/bufpage.h
 create mode 100644 src/include/storage/checksum.h
 create mode 100644 src/include/storage/checksum_impl.h
 create mode 100644 src/include/storage/condition_variable.h
 create mode 100644 src/include/storage/copydir.h
 create mode 100644 src/include/storage/dsm.h
 create mode 100644 src/include/storage/dsm_impl.h
 create mode 100644 src/include/storage/fd.h
 create mode 100644 src/include/storage/freespace.h
 create mode 100644 src/include/storage/fsm_internals.h
 create mode 100644 src/include/storage/indexfsm.h
 create mode 100644 src/include/storage/ipc.h
 create mode 100644 src/include/storage/item.h
 create mode 100644 src/include/storage/itemid.h
 create mode 100644 src/include/storage/itemptr.h
 create mode 100644 src/include/storage/large_object.h
 create mode 100644 src/include/storage/latch.h
 create mode 100644 src/include/storage/lmgr.h
 create mode 100644 src/include/storage/lock.h
 create mode 100644 src/include/storage/lockdefs.h
 create mode 100644 src/include/storage/lwlock.h
 create mode 100644 src/include/storage/md.h
 create mode 100644 src/include/storage/off.h
 create mode 100644 src/include/storage/pg_sema.h
 create mode 100644 src/include/storage/pg_shmem.h
 create mode 100644 src/include/storage/pmsignal.h
 create mode 100644 src/include/storage/predicate.h
 create mode 100644 src/include/storage/predicate_internals.h
 create mode 100644 src/include/storage/proc.h
 create mode 100644 src/include/storage/procarray.h
 create mode 100644 src/include/storage/proclist.h
 create mode 100644 src/include/storage/proclist_types.h
 create mode 100644 src/include/storage/procsignal.h
 create mode 100644 src/include/storage/reinit.h
 create mode 100644 src/include/storage/relfilenode.h
 create mode 100644 src/include/storage/s_lock.h
 create mode 100644 src/include/storage/sharedfileset.h
 create mode 100644 src/include/storage/shm_mq.h
 create mode 100644 src/include/storage/shm_toc.h
 create mode 100644 src/include/storage/shmem.h
 create mode 100644 src/include/storage/sinval.h
 create mode 100644 src/include/storage/sinvaladt.h
 create mode 100644 src/include/storage/smgr.h
 create mode 100644 src/include/storage/spin.h
 create mode 100644 src/include/storage/standby.h
 create mode 100644 src/include/storage/standbydefs.h
 create mode 100644 src/include/storage/sync.h

(limited to 'src/include/storage')

diff --git a/src/include/storage/.gitignore b/src/include/storage/.gitignore
new file mode 100644
index 0000000..209c8be
--- /dev/null
+++ b/src/include/storage/.gitignore
@@ -0,0 +1 @@
+/lwlocknames.h
diff --git a/src/include/storage/backendid.h b/src/include/storage/backendid.h
new file mode 100644
index 0000000..0c776a3
--- /dev/null
+++ b/src/include/storage/backendid.h
@@ -0,0 +1,37 @@
+/*-------------------------------------------------------------------------
+ *
+ * backendid.h
+ *	  POSTGRES backend id communication definitions
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/backendid.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BACKENDID_H
+#define BACKENDID_H
+
+/* ----------------
+ *		-cim 8/17/90
+ * ----------------
+ */
+typedef int BackendId;			/* unique currently active backend identifier */
+
+#define InvalidBackendId		(-1)
+
+extern PGDLLIMPORT BackendId MyBackendId;	/* backend id of this backend */
+
+/* backend id of our parallel session leader, or InvalidBackendId if none */
+extern PGDLLIMPORT BackendId ParallelMasterBackendId;
+
+/*
+ * The BackendId to use for our session's temp relations is normally our own,
+ * but parallel workers should use their leader's ID.
+ */
+#define BackendIdForTempRelations() \
+	(ParallelMasterBackendId == InvalidBackendId ? MyBackendId : ParallelMasterBackendId)
+
+#endif							/* BACKENDID_H */
diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h
new file mode 100644
index 0000000..d71927c
--- /dev/null
+++ b/src/include/storage/barrier.h
@@ -0,0 +1,45 @@
+/*-------------------------------------------------------------------------
+ *
+ * barrier.h
+ *	  Barriers for synchronizing cooperating processes.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/barrier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BARRIER_H
+#define BARRIER_H
+
+/*
+ * For the header previously known as "barrier.h", please include
+ * "port/atomics.h", which deals with atomics, compiler barriers and memory
+ * barriers.
+ */
+
+#include "storage/condition_variable.h"
+#include "storage/spin.h"
+
+typedef struct Barrier
+{
+	slock_t		mutex;
+	int			phase;			/* phase counter */
+	int			participants;	/* the number of participants attached */
+	int			arrived;		/* the number of participants that have
+								 * arrived */
+	int			elected;		/* highest phase elected */
+	bool		static_party;	/* used only for assertions */
+	ConditionVariable condition_variable;
+} Barrier;
+
+extern void BarrierInit(Barrier *barrier, int num_workers);
+extern bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info);
+extern bool BarrierArriveAndDetach(Barrier *barrier);
+extern int	BarrierAttach(Barrier *barrier);
+extern bool BarrierDetach(Barrier *barrier);
+extern int	BarrierPhase(Barrier *barrier);
+extern int	BarrierParticipants(Barrier *barrier);
+
+#endif							/* BARRIER_H */
diff --git a/src/include/storage/block.h b/src/include/storage/block.h
new file mode 100644
index 0000000..d73e393
--- /dev/null
+++ b/src/include/storage/block.h
@@ -0,0 +1,121 @@
+/*-------------------------------------------------------------------------
+ *
+ * block.h
+ *	  POSTGRES disk block definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/block.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BLOCK_H
+#define BLOCK_H
+
+/*
+ * BlockNumber:
+ *
+ * each data file (heap or index) is divided into postgres disk blocks
+ * (which may be thought of as the unit of i/o -- a postgres buffer
+ * contains exactly one disk block).  the blocks are numbered
+ * sequentially, 0 to 0xFFFFFFFE.
+ *
+ * InvalidBlockNumber is the same thing as P_NEW in bufmgr.h.
+ *
+ * the access methods, the buffer manager and the storage manager are
+ * more or less the only pieces of code that should be accessing disk
+ * blocks directly.
+ */
+typedef uint32 BlockNumber;
+
+#define InvalidBlockNumber		((BlockNumber) 0xFFFFFFFF)
+
+#define MaxBlockNumber			((BlockNumber) 0xFFFFFFFE)
+
+/*
+ * BlockId:
+ *
+ * this is a storage type for BlockNumber.  in other words, this type
+ * is used for on-disk structures (e.g., in HeapTupleData) whereas
+ * BlockNumber is the type on which calculations are performed (e.g.,
+ * in access method code).
+ *
+ * there doesn't appear to be any reason to have separate types except
+ * for the fact that BlockIds can be SHORTALIGN'd (and therefore any
+ * structures that contains them, such as ItemPointerData, can also be
+ * SHORTALIGN'd).  this is an important consideration for reducing the
+ * space requirements of the line pointer (ItemIdData) array on each
+ * page and the header of each heap or index tuple, so it doesn't seem
+ * wise to change this without good reason.
+ */
+typedef struct BlockIdData
+{
+	uint16		bi_hi;
+	uint16		bi_lo;
+} BlockIdData;
+
+typedef BlockIdData *BlockId;	/* block identifier */
+
+/* ----------------
+ *		support macros
+ * ----------------
+ */
+
+/*
+ * BlockNumberIsValid
+ *		True iff blockNumber is valid.
+ */
+#define BlockNumberIsValid(blockNumber) \
+	((bool) ((BlockNumber) (blockNumber) != InvalidBlockNumber))
+
+/*
+ * BlockIdIsValid
+ *		True iff the block identifier is valid.
+ */
+#define BlockIdIsValid(blockId) \
+	((bool) PointerIsValid(blockId))
+
+/*
+ * BlockIdSet
+ *		Sets a block identifier to the specified value.
+ */
+#define BlockIdSet(blockId, blockNumber) \
+( \
+	AssertMacro(PointerIsValid(blockId)), \
+	(blockId)->bi_hi = (blockNumber) >> 16, \
+	(blockId)->bi_lo = (blockNumber) & 0xffff \
+)
+
+/*
+ * BlockIdCopy
+ *		Copy a block identifier.
+ */
+#define BlockIdCopy(toBlockId, fromBlockId) \
+( \
+	AssertMacro(PointerIsValid(toBlockId)), \
+	AssertMacro(PointerIsValid(fromBlockId)), \
+	(toBlockId)->bi_hi = (fromBlockId)->bi_hi, \
+	(toBlockId)->bi_lo = (fromBlockId)->bi_lo \
+)
+
+/*
+ * BlockIdEquals
+ *		Check for block number equality.
+ */
+#define BlockIdEquals(blockId1, blockId2) \
+	((blockId1)->bi_hi == (blockId2)->bi_hi && \
+	 (blockId1)->bi_lo == (blockId2)->bi_lo)
+
+/*
+ * BlockIdGetBlockNumber
+ *		Retrieve the block number from a block identifier.
+ */
+#define BlockIdGetBlockNumber(blockId) \
+( \
+	AssertMacro(BlockIdIsValid(blockId)), \
+	(BlockNumber) (((blockId)->bi_hi << 16) | ((uint16) (blockId)->bi_lo)) \
+)
+
+#endif							/* BLOCK_H */
diff --git a/src/include/storage/buf.h b/src/include/storage/buf.h
new file mode 100644
index 0000000..dde87f8
--- /dev/null
+++ b/src/include/storage/buf.h
@@ -0,0 +1,46 @@
+/*-------------------------------------------------------------------------
+ *
+ * buf.h
+ *	  Basic buffer manager data types.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/buf.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BUF_H
+#define BUF_H
+
+/*
+ * Buffer identifiers.
+ *
+ * Zero is invalid, positive is the index of a shared buffer (1..NBuffers),
+ * negative is the index of a local buffer (-1 .. -NLocBuffer).
+ */
+typedef int Buffer;
+
+#define InvalidBuffer	0
+
+/*
+ * BufferIsInvalid
+ *		True iff the buffer is invalid.
+ */
+#define BufferIsInvalid(buffer) ((buffer) == InvalidBuffer)
+
+/*
+ * BufferIsLocal
+ *		True iff the buffer is local (not visible to other backends).
+ */
+#define BufferIsLocal(buffer)	((buffer) < 0)
+
+/*
+ * Buffer access strategy objects.
+ *
+ * BufferAccessStrategyData is private to freelist.c
+ */
+typedef struct BufferAccessStrategyData *BufferAccessStrategy;
+
+#endif							/* BUF_H */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
new file mode 100644
index 0000000..3377fa5
--- /dev/null
+++ b/src/include/storage/buf_internals.h
@@ -0,0 +1,341 @@
+/*-------------------------------------------------------------------------
+ *
+ * buf_internals.h
+ *	  Internal definitions for buffer manager and the buffer replacement
+ *	  strategy.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/buf_internals.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BUFMGR_INTERNALS_H
+#define BUFMGR_INTERNALS_H
+
+#include "port/atomics.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include "storage/smgr.h"
+#include "storage/spin.h"
+#include "utils/relcache.h"
+
+/*
+ * Buffer state is a single 32-bit variable where following data is combined.
+ *
+ * - 18 bits refcount
+ * - 4 bits usage count
+ * - 10 bits of flags
+ *
+ * Combining these values allows to perform some operations without locking
+ * the buffer header, by modifying them together with a CAS loop.
+ *
+ * The definition of buffer state components is below.
+ */
+#define BUF_REFCOUNT_ONE 1
+#define BUF_REFCOUNT_MASK ((1U << 18) - 1)
+#define BUF_USAGECOUNT_MASK 0x003C0000U
+#define BUF_USAGECOUNT_ONE (1U << 18)
+#define BUF_USAGECOUNT_SHIFT 18
+#define BUF_FLAG_MASK 0xFFC00000U
+
+/* Get refcount and usagecount from buffer state */
+#define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK)
+#define BUF_STATE_GET_USAGECOUNT(state) (((state) & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT)
+
+/*
+ * Flags for buffer descriptors
+ *
+ * Note: BM_TAG_VALID essentially means that there is a buffer hashtable
+ * entry associated with the buffer's tag.
+ */
+#define BM_LOCKED				(1U << 22)	/* buffer header is locked */
+#define BM_DIRTY				(1U << 23)	/* data needs writing */
+#define BM_VALID				(1U << 24)	/* data is valid */
+#define BM_TAG_VALID			(1U << 25)	/* tag is assigned */
+#define BM_IO_IN_PROGRESS		(1U << 26)	/* read or write in progress */
+#define BM_IO_ERROR				(1U << 27)	/* previous I/O failed */
+#define BM_JUST_DIRTIED			(1U << 28)	/* dirtied since write started */
+#define BM_PIN_COUNT_WAITER		(1U << 29)	/* have waiter for sole pin */
+#define BM_CHECKPOINT_NEEDED	(1U << 30)	/* must write for checkpoint */
+#define BM_PERMANENT			(1U << 31)	/* permanent buffer (not unlogged,
+											 * or init fork) */
+/*
+ * The maximum allowed value of usage_count represents a tradeoff between
+ * accuracy and speed of the clock-sweep buffer management algorithm.  A
+ * large value (comparable to NBuffers) would approximate LRU semantics.
+ * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
+ * clock sweeps to find a free buffer, so in practice we don't want the
+ * value to be very large.
+ */
+#define BM_MAX_USAGE_COUNT	5
+
+/*
+ * Buffer tag identifies which disk block the buffer contains.
+ *
+ * Note: the BufferTag data must be sufficient to determine where to write the
+ * block, without reference to pg_class or pg_tablespace entries.  It's
+ * possible that the backend flushing the buffer doesn't even believe the
+ * relation is visible yet (its xact may have started before the xact that
+ * created the rel).  The storage manager must be able to cope anyway.
+ *
+ * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
+ * to be fixed to zero them, since this struct is used as a hash key.
+ */
+typedef struct buftag
+{
+	RelFileNode rnode;			/* physical relation identifier */
+	ForkNumber	forkNum;
+	BlockNumber blockNum;		/* blknum relative to begin of reln */
+} BufferTag;
+
+#define CLEAR_BUFFERTAG(a) \
+( \
+	(a).rnode.spcNode = InvalidOid, \
+	(a).rnode.dbNode = InvalidOid, \
+	(a).rnode.relNode = InvalidOid, \
+	(a).forkNum = InvalidForkNumber, \
+	(a).blockNum = InvalidBlockNumber \
+)
+
+#define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
+( \
+	(a).rnode = (xx_rnode), \
+	(a).forkNum = (xx_forkNum), \
+	(a).blockNum = (xx_blockNum) \
+)
+
+#define BUFFERTAGS_EQUAL(a,b) \
+( \
+	RelFileNodeEquals((a).rnode, (b).rnode) && \
+	(a).blockNum == (b).blockNum && \
+	(a).forkNum == (b).forkNum \
+)
+
+/*
+ * The shared buffer mapping table is partitioned to reduce contention.
+ * To determine which partition lock a given tag requires, compute the tag's
+ * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
+ * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
+ */
+#define BufTableHashPartition(hashcode) \
+	((hashcode) % NUM_BUFFER_PARTITIONS)
+#define BufMappingPartitionLock(hashcode) \
+	(&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
+		BufTableHashPartition(hashcode)].lock)
+#define BufMappingPartitionLockByIndex(i) \
+	(&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
+
+/*
+ *	BufferDesc -- shared descriptor/state data for a single shared buffer.
+ *
+ * Note: Buffer header lock (BM_LOCKED flag) must be held to examine or change
+ * the tag, state or wait_backend_pid fields.  In general, buffer header lock
+ * is a spinlock which is combined with flags, refcount and usagecount into
+ * single atomic variable.  This layout allow us to do some operations in a
+ * single atomic operation, without actually acquiring and releasing spinlock;
+ * for instance, increase or decrease refcount.  buf_id field never changes
+ * after initialization, so does not need locking.  freeNext is protected by
+ * the buffer_strategy_lock not buffer header lock.  The LWLock can take care
+ * of itself.  The buffer header lock is *not* used to control access to the
+ * data in the buffer!
+ *
+ * It's assumed that nobody changes the state field while buffer header lock
+ * is held.  Thus buffer header lock holder can do complex updates of the
+ * state variable in single write, simultaneously with lock release (cleaning
+ * BM_LOCKED flag).  On the other hand, updating of state without holding
+ * buffer header lock is restricted to CAS, which insure that BM_LOCKED flag
+ * is not set.  Atomic increment/decrement, OR/AND etc. are not allowed.
+ *
+ * An exception is that if we have the buffer pinned, its tag can't change
+ * underneath us, so we can examine the tag without locking the buffer header.
+ * Also, in places we do one-time reads of the flags without bothering to
+ * lock the buffer header; this is generally for situations where we don't
+ * expect the flag bit being tested to be changing.
+ *
+ * We can't physically remove items from a disk page if another backend has
+ * the buffer pinned.  Hence, a backend may need to wait for all other pins
+ * to go away.  This is signaled by storing its own PID into
+ * wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER.  At present,
+ * there can be only one such waiter per buffer.
+ *
+ * We use this same struct for local buffer headers, but the locks are not
+ * used and not all of the flag bits are useful either. To avoid unnecessary
+ * overhead, manipulations of the state field should be done without actual
+ * atomic operations (i.e. only pg_atomic_read_u32() and
+ * pg_atomic_unlocked_write_u32()).
+ *
+ * Be careful to avoid increasing the size of the struct when adding or
+ * reordering members.  Keeping it below 64 bytes (the most common CPU
+ * cache line size) is fairly important for performance.
+ */
+typedef struct BufferDesc
+{
+	BufferTag	tag;			/* ID of page contained in buffer */
+	int			buf_id;			/* buffer's index number (from 0) */
+
+	/* state of the tag, containing flags, refcount and usagecount */
+	pg_atomic_uint32 state;
+
+	int			wait_backend_pid;	/* backend PID of pin-count waiter */
+	int			freeNext;		/* link in freelist chain */
+
+	LWLock		content_lock;	/* to lock access to buffer contents */
+} BufferDesc;
+
+/*
+ * Concurrent access to buffer headers has proven to be more efficient if
+ * they're cache line aligned. So we force the start of the BufferDescriptors
+ * array to be on a cache line boundary and force the elements to be cache
+ * line sized.
+ *
+ * XXX: As this is primarily matters in highly concurrent workloads which
+ * probably all are 64bit these days, and the space wastage would be a bit
+ * more noticeable on 32bit systems, we don't force the stride to be cache
+ * line sized on those. If somebody does actual performance testing, we can
+ * reevaluate.
+ *
+ * Note that local buffer descriptors aren't forced to be aligned - as there's
+ * no concurrent access to those it's unlikely to be beneficial.
+ *
+ * We use a 64-byte cache line size here, because that's the most common
+ * size. Making it bigger would be a waste of memory. Even if running on a
+ * platform with either 32 or 128 byte line sizes, it's good to align to
+ * boundaries and avoid false sharing.
+ */
+#define BUFFERDESC_PAD_TO_SIZE	(SIZEOF_VOID_P == 8 ? 64 : 1)
+
+typedef union BufferDescPadded
+{
+	BufferDesc	bufferdesc;
+	char		pad[BUFFERDESC_PAD_TO_SIZE];
+} BufferDescPadded;
+
+#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
+#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
+
+#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
+
+#define BufferDescriptorGetIOLock(bdesc) \
+	(&(BufferIOLWLockArray[(bdesc)->buf_id]).lock)
+#define BufferDescriptorGetContentLock(bdesc) \
+	((LWLock*) (&(bdesc)->content_lock))
+
+extern PGDLLIMPORT LWLockMinimallyPadded *BufferIOLWLockArray;
+
+/*
+ * The freeNext field is either the index of the next freelist entry,
+ * or one of these special values:
+ */
+#define FREENEXT_END_OF_LIST	(-1)
+#define FREENEXT_NOT_IN_LIST	(-2)
+
+/*
+ * Functions for acquiring/releasing a shared buffer header's spinlock.  Do
+ * not apply these to local buffers!
+ */
+extern uint32 LockBufHdr(BufferDesc *desc);
+#define UnlockBufHdr(desc, s)	\
+	do {	\
+		pg_write_barrier(); \
+		pg_atomic_write_u32(&(desc)->state, (s) & (~BM_LOCKED)); \
+	} while (0)
+
+
+/*
+ * The PendingWriteback & WritebackContext structure are used to keep
+ * information about pending flush requests to be issued to the OS.
+ */
+typedef struct PendingWriteback
+{
+	/* could store different types of pending flushes here */
+	BufferTag	tag;
+} PendingWriteback;
+
+/* struct forward declared in bufmgr.h */
+typedef struct WritebackContext
+{
+	/* pointer to the max number of writeback requests to coalesce */
+	int		   *max_pending;
+
+	/* current number of pending writeback requests */
+	int			nr_pending;
+
+	/* pending requests */
+	PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES];
+} WritebackContext;
+
+/* in buf_init.c */
+extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
+extern PGDLLIMPORT WritebackContext BackendWritebackContext;
+
+/* in localbuf.c */
+extern BufferDesc *LocalBufferDescriptors;
+
+/* in bufmgr.c */
+
+/*
+ * Structure to sort buffers per file on checkpoints.
+ *
+ * This structure is allocated per buffer in shared memory, so it should be
+ * kept as small as possible.
+ */
+typedef struct CkptSortItem
+{
+	Oid			tsId;
+	Oid			relNode;
+	ForkNumber	forkNum;
+	BlockNumber blockNum;
+	int			buf_id;
+} CkptSortItem;
+
+extern CkptSortItem *CkptBufferIds;
+
+/*
+ * Internal buffer management routines
+ */
+/* bufmgr.c */
+extern void WritebackContextInit(WritebackContext *context, int *max_pending);
+extern void IssuePendingWritebacks(WritebackContext *context);
+extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag);
+
+/* freelist.c */
+extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
+									 uint32 *buf_state);
+extern void StrategyFreeBuffer(BufferDesc *buf);
+extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
+								 BufferDesc *buf);
+
+extern int	StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
+extern void StrategyNotifyBgWriter(int bgwprocno);
+
+extern Size StrategyShmemSize(void);
+extern void StrategyInitialize(bool init);
+extern bool have_free_buffer(void);
+
+/* buf_table.c */
+extern Size BufTableShmemSize(int size);
+extern void InitBufTable(int size);
+extern uint32 BufTableHashCode(BufferTag *tagPtr);
+extern int	BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
+extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
+extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
+
+/* localbuf.c */
+extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
+												ForkNumber forkNum,
+												BlockNumber blockNum);
+extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
+									BlockNumber blockNum, bool *foundPtr);
+extern void MarkLocalBufferDirty(Buffer buffer);
+extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
+										BlockNumber firstDelBlock);
+extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
+extern void AtEOXact_LocalBuffers(bool isCommit);
+
+#endif							/* BUFMGR_INTERNALS_H */
diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h
new file mode 100644
index 0000000..60433f3
--- /dev/null
+++ b/src/include/storage/buffile.h
@@ -0,0 +1,54 @@
+/*-------------------------------------------------------------------------
+ *
+ * buffile.h
+ *	  Management of large buffered temporary files.
+ *
+ * The BufFile routines provide a partial replacement for stdio atop
+ * virtual file descriptors managed by fd.c.  Currently they only support
+ * buffered access to a virtual file, without any of stdio's formatting
+ * features.  That's enough for immediate needs, but the set of facilities
+ * could be expanded if necessary.
+ *
+ * BufFile also supports working with temporary files that exceed the OS
+ * file size limit and/or the largest offset representable in an int.
+ * It might be better to split that out as a separately accessible module,
+ * but currently we have no need for oversize temp files without buffered
+ * access.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/buffile.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef BUFFILE_H
+#define BUFFILE_H
+
+#include "storage/sharedfileset.h"
+
+/* BufFile is an opaque type whose details are not known outside buffile.c. */
+
+typedef struct BufFile BufFile;
+
+/*
+ * prototypes for functions in buffile.c
+ */
+
+extern BufFile *BufFileCreateTemp(bool interXact);
+extern void BufFileClose(BufFile *file);
+extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
+extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
+extern int	BufFileSeek(BufFile *file, int fileno, off_t offset, int whence);
+extern void BufFileTell(BufFile *file, int *fileno, off_t *offset);
+extern int	BufFileSeekBlock(BufFile *file, long blknum);
+extern int64 BufFileSize(BufFile *file);
+extern long BufFileAppend(BufFile *target, BufFile *source);
+
+extern BufFile *BufFileCreateShared(SharedFileSet *fileset, const char *name);
+extern void BufFileExportShared(BufFile *file);
+extern BufFile *BufFileOpenShared(SharedFileSet *fileset, const char *name);
+extern void BufFileDeleteShared(SharedFileSet *fileset, const char *name);
+
+#endif							/* BUFFILE_H */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
new file mode 100644
index 0000000..ee91b8f
--- /dev/null
+++ b/src/include/storage/bufmgr.h
@@ -0,0 +1,292 @@
+/*-------------------------------------------------------------------------
+ *
+ * bufmgr.h
+ *	  POSTGRES buffer manager definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/bufmgr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BUFMGR_H
+#define BUFMGR_H
+
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+#include "utils/snapmgr.h"
+
+typedef void *Block;
+
+/* Possible arguments for GetAccessStrategy() */
+typedef enum BufferAccessStrategyType
+{
+	BAS_NORMAL,					/* Normal random access */
+	BAS_BULKREAD,				/* Large read-only scan (hint bit updates are
+								 * ok) */
+	BAS_BULKWRITE,				/* Large multi-block write (e.g. COPY IN) */
+	BAS_VACUUM					/* VACUUM */
+} BufferAccessStrategyType;
+
+/* Possible modes for ReadBufferExtended() */
+typedef enum
+{
+	RBM_NORMAL,					/* Normal read */
+	RBM_ZERO_AND_LOCK,			/* Don't read from disk, caller will
+								 * initialize. Also locks the page. */
+	RBM_ZERO_AND_CLEANUP_LOCK,	/* Like RBM_ZERO_AND_LOCK, but locks the page
+								 * in "cleanup" mode */
+	RBM_ZERO_ON_ERROR,			/* Read, but return an all-zeros page on error */
+	RBM_NORMAL_NO_LOG			/* Don't log page as invalid during WAL
+								 * replay; otherwise same as RBM_NORMAL */
+} ReadBufferMode;
+
+/*
+ * Type returned by PrefetchBuffer().
+ */
+typedef struct PrefetchBufferResult
+{
+	Buffer		recent_buffer;	/* If valid, a hit (recheck needed!) */
+	bool		initiated_io;	/* If true, a miss resulting in async I/O */
+} PrefetchBufferResult;
+
+/* forward declared, to avoid having to expose buf_internals.h here */
+struct WritebackContext;
+
+/* forward declared, to avoid including smgr.h here */
+struct SMgrRelationData;
+
+/* in globals.c ... this duplicates miscadmin.h */
+extern PGDLLIMPORT int NBuffers;
+
+/* in bufmgr.c */
+extern bool zero_damaged_pages;
+extern int	bgwriter_lru_maxpages;
+extern double bgwriter_lru_multiplier;
+extern bool track_io_timing;
+extern int	effective_io_concurrency;
+extern int	maintenance_io_concurrency;
+
+extern int	checkpoint_flush_after;
+extern int	backend_flush_after;
+extern int	bgwriter_flush_after;
+
+/* in buf_init.c */
+extern PGDLLIMPORT char *BufferBlocks;
+
+/* in localbuf.c */
+extern PGDLLIMPORT int NLocBuffer;
+extern PGDLLIMPORT Block *LocalBufferBlockPointers;
+extern PGDLLIMPORT int32 *LocalRefCount;
+
+/* upper limit for effective_io_concurrency */
+#define MAX_IO_CONCURRENCY 1000
+
+/* special block number for ReadBuffer() */
+#define P_NEW	InvalidBlockNumber	/* grow the file to get a new page */
+
+/*
+ * Buffer content lock modes (mode argument for LockBuffer())
+ */
+#define BUFFER_LOCK_UNLOCK		0
+#define BUFFER_LOCK_SHARE		1
+#define BUFFER_LOCK_EXCLUSIVE	2
+
+/*
+ * These routines are beaten on quite heavily, hence the macroization.
+ */
+
+/*
+ * BufferIsValid
+ *		True iff the given buffer number is valid (either as a shared
+ *		or local buffer).
+ *
+ * Note: For a long time this was defined the same as BufferIsPinned,
+ * that is it would say False if you didn't hold a pin on the buffer.
+ * I believe this was bogus and served only to mask logic errors.
+ * Code should always know whether it has a buffer reference,
+ * independently of the pin state.
+ *
+ * Note: For a further long time this was not quite the inverse of the
+ * BufferIsInvalid() macro, in that it also did sanity checks to verify
+ * that the buffer number was in range.  Most likely, this macro was
+ * originally intended only to be used in assertions, but its use has
+ * since expanded quite a bit, and the overhead of making those checks
+ * even in non-assert-enabled builds can be significant.  Thus, we've
+ * now demoted the range checks to assertions within the macro itself.
+ */
+#define BufferIsValid(bufnum) \
+( \
+	AssertMacro((bufnum) <= NBuffers && (bufnum) >= -NLocBuffer), \
+	(bufnum) != InvalidBuffer  \
+)
+
+/*
+ * BufferGetBlock
+ *		Returns a reference to a disk page image associated with a buffer.
+ *
+ * Note:
+ *		Assumes buffer is valid.
+ */
+#define BufferGetBlock(buffer) \
+( \
+	AssertMacro(BufferIsValid(buffer)), \
+	BufferIsLocal(buffer) ? \
+		LocalBufferBlockPointers[-(buffer) - 1] \
+	: \
+		(Block) (BufferBlocks + ((Size) ((buffer) - 1)) * BLCKSZ) \
+)
+
+/*
+ * BufferGetPageSize
+ *		Returns the page size within a buffer.
+ *
+ * Notes:
+ *		Assumes buffer is valid.
+ *
+ *		The buffer can be a raw disk block and need not contain a valid
+ *		(formatted) disk page.
+ */
+/* XXX should dig out of buffer descriptor */
+#define BufferGetPageSize(buffer) \
+( \
+	AssertMacro(BufferIsValid(buffer)), \
+	(Size)BLCKSZ \
+)
+
+/*
+ * BufferGetPage
+ *		Returns the page associated with a buffer.
+ *
+ * When this is called as part of a scan, there may be a need for a nearby
+ * call to TestForOldSnapshot().  See the definition of that for details.
+ */
+#define BufferGetPage(buffer) ((Page)BufferGetBlock(buffer))
+
+/*
+ * prototypes for functions in bufmgr.c
+ */
+extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
+												 ForkNumber forkNum,
+												 BlockNumber blockNum);
+extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
+										   BlockNumber blockNum);
+extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
+								 BlockNumber blockNum, ReadBufferMode mode,
+								 BufferAccessStrategy strategy);
+extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
+										ForkNumber forkNum, BlockNumber blockNum,
+										ReadBufferMode mode, BufferAccessStrategy strategy);
+extern void ReleaseBuffer(Buffer buffer);
+extern void UnlockReleaseBuffer(Buffer buffer);
+extern void MarkBufferDirty(Buffer buffer);
+extern void IncrBufferRefCount(Buffer buffer);
+extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
+								   BlockNumber blockNum);
+
+extern void InitBufferPool(void);
+extern void InitBufferPoolAccess(void);
+extern void InitBufferPoolBackend(void);
+extern void AtEOXact_Buffers(bool isCommit);
+extern void PrintBufferLeakWarning(Buffer buffer);
+extern void CheckPointBuffers(int flags);
+extern BlockNumber BufferGetBlockNumber(Buffer buffer);
+extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
+												   ForkNumber forkNum);
+extern void FlushOneBuffer(Buffer buffer);
+extern void FlushRelationBuffers(Relation rel);
+extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
+extern void FlushDatabaseBuffers(Oid dbid);
+extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+								   int nforks, BlockNumber *firstDelBlock);
+extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
+extern void DropDatabaseBuffers(Oid dbid);
+
+#define RelationGetNumberOfBlocks(reln) \
+	RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
+
+extern bool BufferIsPermanent(Buffer buffer);
+extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
+
+#ifdef NOT_USED
+extern void PrintPinnedBufs(void);
+#endif
+extern Size BufferShmemSize(void);
+extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
+						 ForkNumber *forknum, BlockNumber *blknum);
+
+extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
+
+extern void UnlockBuffers(void);
+extern void LockBuffer(Buffer buffer, int mode);
+extern bool ConditionalLockBuffer(Buffer buffer);
+extern void LockBufferForCleanup(Buffer buffer);
+extern bool ConditionalLockBufferForCleanup(Buffer buffer);
+extern bool IsBufferCleanupOK(Buffer buffer);
+extern bool HoldingBufferPinThatDelaysRecovery(void);
+
+extern void AbortBufferIO(void);
+
+extern void BufmgrCommit(void);
+extern bool BgBufferSync(struct WritebackContext *wb_context);
+
+extern void AtProcExit_LocalBuffers(void);
+
+extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation);
+
+/* in freelist.c */
+extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
+extern void FreeAccessStrategy(BufferAccessStrategy strategy);
+
+
+/* inline functions */
+
+/*
+ * Although this header file is nominally backend-only, certain frontend
+ * programs like pg_waldump include it.  For compilers that emit static
+ * inline functions even when they're unused, that leads to unsatisfied
+ * external references; hence hide these with #ifndef FRONTEND.
+ */
+
+#ifndef FRONTEND
+
+/*
+ * Check whether the given snapshot is too old to have safely read the given
+ * page from the given table.  If so, throw a "snapshot too old" error.
+ *
+ * This test generally needs to be performed after every BufferGetPage() call
+ * that is executed as part of a scan.  It is not needed for calls made for
+ * modifying the page (for example, to position to the right place to insert a
+ * new index tuple or for vacuuming).  It may also be omitted where calls to
+ * lower-level functions will have already performed the test.
+ *
+ * Note that a NULL snapshot argument is allowed and causes a fast return
+ * without error; this is to support call sites which can be called from
+ * either scans or index modification areas.
+ *
+ * For best performance, keep the tests that are fastest and/or most likely to
+ * exclude a page from old snapshot testing near the front.
+ */
+static inline void
+TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
+{
+	Assert(relation != NULL);
+
+	if (old_snapshot_threshold >= 0
+		&& (snapshot) != NULL
+		&& ((snapshot)->snapshot_type == SNAPSHOT_MVCC
+			|| (snapshot)->snapshot_type == SNAPSHOT_TOAST)
+		&& !XLogRecPtrIsInvalid((snapshot)->lsn)
+		&& PageGetLSN(page) > (snapshot)->lsn)
+		TestForOldSnapshot_impl(snapshot, relation);
+}
+
+#endif							/* FRONTEND */
+
+#endif							/* BUFMGR_H */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
new file mode 100644
index 0000000..4d0ae4b
--- /dev/null
+++ b/src/include/storage/bufpage.h
@@ -0,0 +1,459 @@
+/*-------------------------------------------------------------------------
+ *
+ * bufpage.h
+ *	  Standard POSTGRES buffer page definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/bufpage.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BUFPAGE_H
+#define BUFPAGE_H
+
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/item.h"
+#include "storage/off.h"
+
+/*
+ * A postgres disk page is an abstraction layered on top of a postgres
+ * disk block (which is simply a unit of i/o, see block.h).
+ *
+ * specifically, while a disk block can be unformatted, a postgres
+ * disk page is always a slotted page of the form:
+ *
+ * +----------------+---------------------------------+
+ * | PageHeaderData | linp1 linp2 linp3 ...           |
+ * +-----------+----+---------------------------------+
+ * | ... linpN |									  |
+ * +-----------+--------------------------------------+
+ * |		   ^ pd_lower							  |
+ * |												  |
+ * |			 v pd_upper							  |
+ * +-------------+------------------------------------+
+ * |			 | tupleN ...                         |
+ * +-------------+------------------+-----------------+
+ * |	   ... tuple3 tuple2 tuple1 | "special space" |
+ * +--------------------------------+-----------------+
+ *									^ pd_special
+ *
+ * a page is full when nothing can be added between pd_lower and
+ * pd_upper.
+ *
+ * all blocks written out by an access method must be disk pages.
+ *
+ * EXCEPTIONS:
+ *
+ * obviously, a page is not formatted before it is initialized by
+ * a call to PageInit.
+ *
+ * NOTES:
+ *
+ * linp1..N form an ItemId (line pointer) array.  ItemPointers point
+ * to a physical block number and a logical offset (line pointer
+ * number) within that block/page.  Note that OffsetNumbers
+ * conventionally start at 1, not 0.
+ *
+ * tuple1..N are added "backwards" on the page.  Since an ItemPointer
+ * offset is used to access an ItemId entry rather than an actual
+ * byte-offset position, tuples can be physically shuffled on a page
+ * whenever the need arises.  This indirection also keeps crash recovery
+ * relatively simple, because the low-level details of page space
+ * management can be controlled by standard buffer page code during
+ * logging, and during recovery.
+ *
+ * AM-generic per-page information is kept in PageHeaderData.
+ *
+ * AM-specific per-page data (if any) is kept in the area marked "special
+ * space"; each AM has an "opaque" structure defined somewhere that is
+ * stored as the page trailer.  an access method should always
+ * initialize its pages with PageInit and then set its own opaque
+ * fields.
+ */
+
+typedef Pointer Page;
+
+
+/*
+ * location (byte offset) within a page.
+ *
+ * note that this is actually limited to 2^15 because we have limited
+ * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
+ */
+typedef uint16 LocationIndex;
+
+
+/*
+ * For historical reasons, the 64-bit LSN value is stored as two 32-bit
+ * values.
+ */
+typedef struct
+{
+	uint32		xlogid;			/* high bits */
+	uint32		xrecoff;		/* low bits */
+} PageXLogRecPtr;
+
+#define PageXLogRecPtrGet(val) \
+	((uint64) (val).xlogid << 32 | (val).xrecoff)
+#define PageXLogRecPtrSet(ptr, lsn) \
+	((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
+
+/*
+ * disk page organization
+ *
+ * space management information generic to any page
+ *
+ *		pd_lsn		- identifies xlog record for last change to this page.
+ *		pd_checksum - page checksum, if set.
+ *		pd_flags	- flag bits.
+ *		pd_lower	- offset to start of free space.
+ *		pd_upper	- offset to end of free space.
+ *		pd_special	- offset to start of special space.
+ *		pd_pagesize_version - size in bytes and page layout version number.
+ *		pd_prune_xid - oldest XID among potentially prunable tuples on page.
+ *
+ * The LSN is used by the buffer manager to enforce the basic rule of WAL:
+ * "thou shalt write xlog before data".  A dirty buffer cannot be dumped
+ * to disk until xlog has been flushed at least as far as the page's LSN.
+ *
+ * pd_checksum stores the page checksum, if it has been set for this page;
+ * zero is a valid value for a checksum. If a checksum is not in use then
+ * we leave the field unset. This will typically mean the field is zero
+ * though non-zero values may also be present if databases have been
+ * pg_upgraded from releases prior to 9.3, when the same byte offset was
+ * used to store the current timelineid when the page was last updated.
+ * Note that there is no indication on a page as to whether the checksum
+ * is valid or not, a deliberate design choice which avoids the problem
+ * of relying on the page contents to decide whether to verify it. Hence
+ * there are no flag bits relating to checksums.
+ *
+ * pd_prune_xid is a hint field that helps determine whether pruning will be
+ * useful.  It is currently unused in index pages.
+ *
+ * The page version number and page size are packed together into a single
+ * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
+ * there was no concept of a page version number, and doing it this way
+ * lets us pretend that pre-7.3 databases have page version number zero.
+ * We constrain page sizes to be multiples of 256, leaving the low eight
+ * bits available for a version number.
+ *
+ * Minimum possible page size is perhaps 64B to fit page header, opaque space
+ * and a minimal tuple; of course, in reality you want it much bigger, so
+ * the constraint on pagesize mod 256 is not an important restriction.
+ * On the high end, we can only support pages up to 32KB because lp_off/lp_len
+ * are 15 bits.
+ */
+
+typedef struct PageHeaderData
+{
+	/* XXX LSN is member of *any* block, not only page-organized ones */
+	PageXLogRecPtr pd_lsn;		/* LSN: next byte after last byte of xlog
+								 * record for last change to this page */
+	uint16		pd_checksum;	/* checksum */
+	uint16		pd_flags;		/* flag bits, see below */
+	LocationIndex pd_lower;		/* offset to start of free space */
+	LocationIndex pd_upper;		/* offset to end of free space */
+	LocationIndex pd_special;	/* offset to start of special space */
+	uint16		pd_pagesize_version;
+	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
+	ItemIdData	pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
+} PageHeaderData;
+
+typedef PageHeaderData *PageHeader;
+
+/*
+ * pd_flags contains the following flag bits.  Undefined bits are initialized
+ * to zero and may be used in the future.
+ *
+ * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
+ * pd_lower.  This should be considered a hint rather than the truth, since
+ * changes to it are not WAL-logged.
+ *
+ * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
+ * page for its new tuple version; this suggests that a prune is needed.
+ * Again, this is just a hint.
+ */
+#define PD_HAS_FREE_LINES	0x0001	/* are there any unused line pointers? */
+#define PD_PAGE_FULL		0x0002	/* not enough free space for new tuple? */
+#define PD_ALL_VISIBLE		0x0004	/* all tuples on page are visible to
+									 * everyone */
+
+#define PD_VALID_FLAG_BITS	0x0007	/* OR of all valid pd_flags bits */
+
+/*
+ * Page layout version number 0 is for pre-7.3 Postgres releases.
+ * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
+ * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
+ * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
+ * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
+ *		added the pd_flags field (by stealing some bits from pd_tli),
+ *		as well as adding the pd_prune_xid field (which enlarges the header).
+ *
+ * As of Release 9.3, the checksum version must also be considered when
+ * handling pages.
+ */
+#define PG_PAGE_LAYOUT_VERSION		4
+#define PG_DATA_CHECKSUM_VERSION	1
+
+/* ----------------------------------------------------------------
+ *						page support macros
+ * ----------------------------------------------------------------
+ */
+
+/*
+ * PageIsValid
+ *		True iff page is valid.
+ */
+#define PageIsValid(page) PointerIsValid(page)
+
+/*
+ * line pointer(s) do not count as part of header
+ */
+#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
+
+/*
+ * PageIsEmpty
+ *		returns true iff no itemid has been allocated on the page
+ */
+#define PageIsEmpty(page) \
+	(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
+
+/*
+ * PageIsNew
+ *		returns true iff page has not been initialized (by PageInit)
+ */
+#define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
+
+/*
+ * PageGetItemId
+ *		Returns an item identifier of a page.
+ */
+#define PageGetItemId(page, offsetNumber) \
+	((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
+
+/*
+ * PageGetContents
+ *		To be used in cases where the page does not contain line pointers.
+ *
+ * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
+ * Now it is.  Beware of old code that might think the offset to the contents
+ * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
+ */
+#define PageGetContents(page) \
+	((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
+
+/* ----------------
+ *		macros to access page size info
+ * ----------------
+ */
+
+/*
+ * PageSizeIsValid
+ *		True iff the page size is valid.
+ */
+#define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
+
+/*
+ * PageGetPageSize
+ *		Returns the page size of a page.
+ *
+ * this can only be called on a formatted page (unlike
+ * BufferGetPageSize, which can be called on an unformatted page).
+ * however, it can be called on a page that is not stored in a buffer.
+ */
+#define PageGetPageSize(page) \
+	((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
+
+/*
+ * PageGetPageLayoutVersion
+ *		Returns the page layout version of a page.
+ */
+#define PageGetPageLayoutVersion(page) \
+	(((PageHeader) (page))->pd_pagesize_version & 0x00FF)
+
+/*
+ * PageSetPageSizeAndVersion
+ *		Sets the page size and page layout version number of a page.
+ *
+ * We could support setting these two values separately, but there's
+ * no real need for it at the moment.
+ */
+#define PageSetPageSizeAndVersion(page, size, version) \
+( \
+	AssertMacro(((size) & 0xFF00) == (size)), \
+	AssertMacro(((version) & 0x00FF) == (version)), \
+	((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
+)
+
+/* ----------------
+ *		page special data macros
+ * ----------------
+ */
+/*
+ * PageGetSpecialSize
+ *		Returns size of special space on a page.
+ */
+#define PageGetSpecialSize(page) \
+	((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
+
+/*
+ * Using assertions, validate that the page special pointer is OK.
+ *
+ * This is intended to catch use of the pointer before page initialization.
+ * It is implemented as a function due to the limitations of the MSVC
+ * compiler, which choked on doing all these tests within another macro.  We
+ * return true so that AssertMacro() can be used while still getting the
+ * specifics from the macro failure within this function.
+ */
+static inline bool
+PageValidateSpecialPointer(Page page)
+{
+	Assert(PageIsValid(page));
+	Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
+	Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
+
+	return true;
+}
+
+/*
+ * PageGetSpecialPointer
+ *		Returns pointer to special space on a page.
+ */
+#define PageGetSpecialPointer(page) \
+( \
+	AssertMacro(PageValidateSpecialPointer(page)), \
+	(char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
+)
+
+/*
+ * PageGetItem
+ *		Retrieves an item on the given page.
+ *
+ * Note:
+ *		This does not change the status of any of the resources passed.
+ *		The semantics may change in the future.
+ */
+#define PageGetItem(page, itemId) \
+( \
+	AssertMacro(PageIsValid(page)), \
+	AssertMacro(ItemIdHasStorage(itemId)), \
+	(Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
+)
+
+/*
+ * PageGetMaxOffsetNumber
+ *		Returns the maximum offset number used by the given page.
+ *		Since offset numbers are 1-based, this is also the number
+ *		of items on the page.
+ *
+ *		NOTE: if the page is not initialized (pd_lower == 0), we must
+ *		return zero to ensure sane behavior.  Accept double evaluation
+ *		of the argument so that we can ensure this.
+ */
+#define PageGetMaxOffsetNumber(page) \
+	(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
+	 ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
+	  / sizeof(ItemIdData)))
+
+/*
+ * Additional macros for access to page headers. (Beware multiple evaluation
+ * of the arguments!)
+ */
+#define PageGetLSN(page) \
+	PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
+#define PageSetLSN(page, lsn) \
+	PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
+
+#define PageHasFreeLinePointers(page) \
+	(((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
+#define PageSetHasFreeLinePointers(page) \
+	(((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
+#define PageClearHasFreeLinePointers(page) \
+	(((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
+
+#define PageIsFull(page) \
+	(((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
+#define PageSetFull(page) \
+	(((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
+#define PageClearFull(page) \
+	(((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
+
+#define PageIsAllVisible(page) \
+	(((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
+#define PageSetAllVisible(page) \
+	(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
+#define PageClearAllVisible(page) \
+	(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
+
+#define PageIsPrunable(page, oldestxmin) \
+( \
+	AssertMacro(TransactionIdIsNormal(oldestxmin)), \
+	TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
+	TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
+)
+#define PageSetPrunable(page, xid) \
+do { \
+	Assert(TransactionIdIsNormal(xid)); \
+	if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
+		TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
+		((PageHeader) (page))->pd_prune_xid = (xid); \
+} while (0)
+#define PageClearPrunable(page) \
+	(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
+
+
+/* ----------------------------------------------------------------
+ *		extern declarations
+ * ----------------------------------------------------------------
+ */
+
+/* flags for PageAddItemExtended() */
+#define PAI_OVERWRITE			(1 << 0)
+#define PAI_IS_HEAP				(1 << 1)
+
+/* flags for PageIsVerifiedExtended() */
+#define PIV_LOG_WARNING			(1 << 0)
+#define PIV_REPORT_STAT			(1 << 1)
+
+#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
+	PageAddItemExtended(page, item, size, offsetNumber, \
+						((overwrite) ? PAI_OVERWRITE : 0) | \
+						((is_heap) ? PAI_IS_HEAP : 0))
+
+/*
+ * Check that BLCKSZ is a multiple of sizeof(size_t).  In
+ * PageIsVerifiedExtended(), it is much faster to check if a page is
+ * full of zeroes using the native word size.  Note that this assertion
+ * is kept within a header to make sure that StaticAssertDecl() works
+ * across various combinations of platforms and compilers.
+ */
+StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
+				 "BLCKSZ has to be a multiple of sizeof(size_t)");
+
+extern void PageInit(Page page, Size pageSize, Size specialSize);
+extern bool PageIsVerified(Page page, BlockNumber blkno);
+extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
+extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
+										OffsetNumber offsetNumber, int flags);
+extern Page PageGetTempPage(Page page);
+extern Page PageGetTempPageCopy(Page page);
+extern Page PageGetTempPageCopySpecial(Page page);
+extern void PageRestoreTempPage(Page tempPage, Page oldPage);
+extern void PageRepairFragmentation(Page page);
+extern Size PageGetFreeSpace(Page page);
+extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
+extern Size PageGetExactFreeSpace(Page page);
+extern Size PageGetHeapFreeSpace(Page page);
+extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
+extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
+extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset);
+extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
+									Item newtup, Size newsize);
+extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
+extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
+
+#endif							/* BUFPAGE_H */
diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h
new file mode 100644
index 0000000..6e77744
--- /dev/null
+++ b/src/include/storage/checksum.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum.h
+ *	  Checksum implementation for data pages.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/checksum.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CHECKSUM_H
+#define CHECKSUM_H
+
+#include "storage/block.h"
+
+/*
+ * Compute the checksum for a Postgres page.  The page must be aligned on a
+ * 4-byte boundary.
+ */
+extern uint16 pg_checksum_page(char *page, BlockNumber blkno);
+
+#endif							/* CHECKSUM_H */
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
new file mode 100644
index 0000000..364acfa
--- /dev/null
+++ b/src/include/storage/checksum_impl.h
@@ -0,0 +1,215 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum_impl.h
+ *	  Checksum implementation for data pages.
+ *
+ * This file exists for the benefit of external programs that may wish to
+ * check Postgres page checksums.  They can #include this to get the code
+ * referenced by storage/checksum.h.  (Note: you may need to redefine
+ * Assert() as empty to compile this successfully externally.)
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/checksum_impl.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * The algorithm used to checksum pages is chosen for very fast calculation.
+ * Workloads where the database working set fits into OS file cache but not
+ * into shared buffers can read in pages at a very fast pace and the checksum
+ * algorithm itself can become the largest bottleneck.
+ *
+ * The checksum algorithm itself is based on the FNV-1a hash (FNV is shorthand
+ * for Fowler/Noll/Vo).  The primitive of a plain FNV-1a hash folds in data 1
+ * byte at a time according to the formula:
+ *
+ *	   hash = (hash ^ value) * FNV_PRIME
+ *
+ * FNV-1a algorithm is described at http://www.isthe.com/chongo/tech/comp/fnv/
+ *
+ * PostgreSQL doesn't use FNV-1a hash directly because it has bad mixing of
+ * high bits - high order bits in input data only affect high order bits in
+ * output data. To resolve this we xor in the value prior to multiplication
+ * shifted right by 17 bits. The number 17 was chosen because it doesn't
+ * have common denominator with set bit positions in FNV_PRIME and empirically
+ * provides the fastest mixing for high order bits of final iterations quickly
+ * avalanche into lower positions. For performance reasons we choose to combine
+ * 4 bytes at a time. The actual hash formula used as the basis is:
+ *
+ *	   hash = (hash ^ value) * FNV_PRIME ^ ((hash ^ value) >> 17)
+ *
+ * The main bottleneck in this calculation is the multiplication latency. To
+ * hide the latency and to make use of SIMD parallelism multiple hash values
+ * are calculated in parallel. The page is treated as a 32 column two
+ * dimensional array of 32 bit values. Each column is aggregated separately
+ * into a partial checksum. Each partial checksum uses a different initial
+ * value (offset basis in FNV terminology). The initial values actually used
+ * were chosen randomly, as the values themselves don't matter as much as that
+ * they are different and don't match anything in real data. After initializing
+ * partial checksums each value in the column is aggregated according to the
+ * above formula. Finally two more iterations of the formula are performed with
+ * value 0 to mix the bits of the last value added.
+ *
+ * The partial checksums are then folded together using xor to form a single
+ * 32-bit checksum. The caller can safely reduce the value to 16 bits
+ * using modulo 2^16-1. That will cause a very slight bias towards lower
+ * values but this is not significant for the performance of the
+ * checksum.
+ *
+ * The algorithm choice was based on what instructions are available in SIMD
+ * instruction sets. This meant that a fast and good algorithm needed to use
+ * multiplication as the main mixing operator. The simplest multiplication
+ * based checksum primitive is the one used by FNV. The prime used is chosen
+ * for good dispersion of values. It has no known simple patterns that result
+ * in collisions. Test of 5-bit differentials of the primitive over 64bit keys
+ * reveals no differentials with 3 or more values out of 100000 random keys
+ * colliding. Avalanche test shows that only high order bits of the last word
+ * have a bias. Tests of 1-4 uncorrelated bit errors, stray 0 and 0xFF bytes,
+ * overwriting page from random position to end with 0 bytes, and overwriting
+ * random segments of page with 0x00, 0xFF and random data all show optimal
+ * 2e-16 false positive rate within margin of error.
+ *
+ * Vectorization of the algorithm requires 32bit x 32bit -> 32bit integer
+ * multiplication instruction. As of 2013 the corresponding instruction is
+ * available on x86 SSE4.1 extensions (pmulld) and ARM NEON (vmul.i32).
+ * Vectorization requires a compiler to do the vectorization for us. For recent
+ * GCC versions the flags -msse4.1 -funroll-loops -ftree-vectorize are enough
+ * to achieve vectorization.
+ *
+ * The optimal amount of parallelism to use depends on CPU specific instruction
+ * latency, SIMD instruction width, throughput and the amount of registers
+ * available to hold intermediate state. Generally, more parallelism is better
+ * up to the point that state doesn't fit in registers and extra load-store
+ * instructions are needed to swap values in/out. The number chosen is a fixed
+ * part of the algorithm because changing the parallelism changes the checksum
+ * result.
+ *
+ * The parallelism number 32 was chosen based on the fact that it is the
+ * largest state that fits into architecturally visible x86 SSE registers while
+ * leaving some free registers for intermediate values. For future processors
+ * with 256bit vector registers this will leave some performance on the table.
+ * When vectorization is not available it might be beneficial to restructure
+ * the computation to calculate a subset of the columns at a time and perform
+ * multiple passes to avoid register spilling. This optimization opportunity
+ * is not used. Current coding also assumes that the compiler has the ability
+ * to unroll the inner loop to avoid loop overhead and minimize register
+ * spilling. For less sophisticated compilers it might be beneficial to
+ * manually unroll the inner loop.
+ */
+
+#include "storage/bufpage.h"
+
+/* number of checksums to calculate in parallel */
+#define N_SUMS 32
+/* prime multiplier of FNV-1a hash */
+#define FNV_PRIME 16777619
+
+/* Use a union so that this code is valid under strict aliasing */
+typedef union
+{
+	PageHeaderData phdr;
+	uint32		data[BLCKSZ / (sizeof(uint32) * N_SUMS)][N_SUMS];
+} PGChecksummablePage;
+
+/*
+ * Base offsets to initialize each of the parallel FNV hashes into a
+ * different initial state.
+ */
+static const uint32 checksumBaseOffsets[N_SUMS] = {
+	0x5B1F36E9, 0xB8525960, 0x02AB50AA, 0x1DE66D2A,
+	0x79FF467A, 0x9BB9F8A3, 0x217E7CD2, 0x83E13D2C,
+	0xF8D4474F, 0xE39EB970, 0x42C6AE16, 0x993216FA,
+	0x7B093B5D, 0x98DAFF3C, 0xF718902A, 0x0B1C9CDB,
+	0xE58F764B, 0x187636BC, 0x5D7B3BB1, 0xE73DE7DE,
+	0x92BEC979, 0xCCA6C0B2, 0x304A0979, 0x85AA43D4,
+	0x783125BB, 0x6CA8EAA2, 0xE407EAC6, 0x4B5CFC3E,
+	0x9FBF8C76, 0x15CA20BE, 0xF2CA9FD3, 0x959BD756
+};
+
+/*
+ * Calculate one round of the checksum.
+ */
+#define CHECKSUM_COMP(checksum, value) \
+do { \
+	uint32 __tmp = (checksum) ^ (value); \
+	(checksum) = __tmp * FNV_PRIME ^ (__tmp >> 17); \
+} while (0)
+
+/*
+ * Block checksum algorithm.  The page must be adequately aligned
+ * (at least on 4-byte boundary).
+ */
+static uint32
+pg_checksum_block(const PGChecksummablePage *page)
+{
+	uint32		sums[N_SUMS];
+	uint32		result = 0;
+	uint32		i,
+				j;
+
+	/* ensure that the size is compatible with the algorithm */
+	Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+
+	/* initialize partial checksums to their corresponding offsets */
+	memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+
+	/* main checksum calculation */
+	for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
+		for (j = 0; j < N_SUMS; j++)
+			CHECKSUM_COMP(sums[j], page->data[i][j]);
+
+	/* finally add in two rounds of zeroes for additional mixing */
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < N_SUMS; j++)
+			CHECKSUM_COMP(sums[j], 0);
+
+	/* xor fold partial checksums together */
+	for (i = 0; i < N_SUMS; i++)
+		result ^= sums[i];
+
+	return result;
+}
+
+/*
+ * Compute the checksum for a Postgres page.
+ *
+ * The page must be adequately aligned (at least on a 4-byte boundary).
+ * Beware also that the checksum field of the page is transiently zeroed.
+ *
+ * The checksum includes the block number (to detect the case where a page is
+ * somehow moved to a different location), the page header (excluding the
+ * checksum itself), and the page data.
+ */
+uint16
+pg_checksum_page(char *page, BlockNumber blkno)
+{
+	PGChecksummablePage *cpage = (PGChecksummablePage *) page;
+	uint16		save_checksum;
+	uint32		checksum;
+
+	/* We only calculate the checksum for properly-initialized pages */
+	Assert(!PageIsNew(&cpage->phdr));
+
+	/*
+	 * Save pd_checksum and temporarily set it to zero, so that the checksum
+	 * calculation isn't affected by the old checksum stored on the page.
+	 * Restore it after, because actually updating the checksum is NOT part of
+	 * the API of this function.
+	 */
+	save_checksum = cpage->phdr.pd_checksum;
+	cpage->phdr.pd_checksum = 0;
+	checksum = pg_checksum_block(cpage);
+	cpage->phdr.pd_checksum = save_checksum;
+
+	/* Mix in the block number to detect transposed pages */
+	checksum ^= blkno;
+
+	/*
+	 * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of
+	 * one. That avoids checksums of zero, which seems like a good idea.
+	 */
+	return (uint16) ((checksum % 65535) + 1);
+}
diff --git a/src/include/storage/condition_variable.h b/src/include/storage/condition_variable.h
new file mode 100644
index 0000000..c2be198
--- /dev/null
+++ b/src/include/storage/condition_variable.h
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------
+ *
+ * condition_variable.h
+ *	  Condition variables
+ *
+ * A condition variable is a method of waiting until a certain condition
+ * becomes true.  Conventionally, a condition variable supports three
+ * operations: (1) sleep; (2) signal, which wakes up one process sleeping
+ * on the condition variable; and (3) broadcast, which wakes up every
+ * process sleeping on the condition variable.  In our implementation,
+ * condition variables put a process into an interruptible sleep (so it
+ * can be canceled prior to the fulfillment of the condition) and do not
+ * use pointers internally (so that they are safe to use within DSMs).
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/condition_variable.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CONDITION_VARIABLE_H
+#define CONDITION_VARIABLE_H
+
+#include "storage/proclist_types.h"
+#include "storage/s_lock.h"
+
+typedef struct
+{
+	slock_t		mutex;			/* spinlock protecting the wakeup list */
+	proclist_head wakeup;		/* list of wake-able processes */
+} ConditionVariable;
+
+/* Initialize a condition variable. */
+extern void ConditionVariableInit(ConditionVariable *cv);
+
+/*
+ * To sleep on a condition variable, a process should use a loop which first
+ * checks the condition, exiting the loop if it is met, and then calls
+ * ConditionVariableSleep.  Spurious wakeups are possible, but should be
+ * infrequent.  After exiting the loop, ConditionVariableCancelSleep must
+ * be called to ensure that the process is no longer in the wait list for
+ * the condition variable.
+ */
+extern void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info);
+extern bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout,
+										uint32 wait_event_info);
+extern void ConditionVariableCancelSleep(void);
+
+/*
+ * Optionally, ConditionVariablePrepareToSleep can be called before entering
+ * the test-and-sleep loop described above.  Doing so is more efficient if
+ * at least one sleep is needed, whereas not doing so is more efficient when
+ * no sleep is needed because the test condition is true the first time.
+ */
+extern void ConditionVariablePrepareToSleep(ConditionVariable *cv);
+
+/* Wake up a single waiter (via signal) or all waiters (via broadcast). */
+extern void ConditionVariableSignal(ConditionVariable *cv);
+extern void ConditionVariableBroadcast(ConditionVariable *cv);
+
+#endif							/* CONDITION_VARIABLE_H */
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
new file mode 100644
index 0000000..5d28f59
--- /dev/null
+++ b/src/include/storage/copydir.h
@@ -0,0 +1,19 @@
+/*-------------------------------------------------------------------------
+ *
+ * copydir.h
+ *	  Copy a directory.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/copydir.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef COPYDIR_H
+#define COPYDIR_H
+
+extern void copydir(char *fromdir, char *todir, bool recurse);
+extern void copy_file(char *fromfile, char *tofile);
+
+#endif							/* COPYDIR_H */
diff --git a/src/include/storage/dsm.h b/src/include/storage/dsm.h
new file mode 100644
index 0000000..408c054
--- /dev/null
+++ b/src/include/storage/dsm.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm.h
+ *	  manage dynamic shared memory segments
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/dsm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DSM_H
+#define DSM_H
+
+#include "storage/dsm_impl.h"
+
+typedef struct dsm_segment dsm_segment;
+
+#define DSM_CREATE_NULL_IF_MAXSEGMENTS			0x0001
+
+/* A sentinel value for an invalid DSM handle. */
+#define DSM_HANDLE_INVALID 0
+
+/* Startup and shutdown functions. */
+struct PGShmemHeader;			/* avoid including pg_shmem.h */
+extern void dsm_cleanup_using_control_segment(dsm_handle old_control_handle);
+extern void dsm_postmaster_startup(struct PGShmemHeader *);
+extern void dsm_backend_shutdown(void);
+extern void dsm_detach_all(void);
+
+#ifdef EXEC_BACKEND
+extern void dsm_set_control_handle(dsm_handle h);
+#endif
+
+/* Functions that create or remove mappings. */
+extern dsm_segment *dsm_create(Size size, int flags);
+extern dsm_segment *dsm_attach(dsm_handle h);
+extern void dsm_detach(dsm_segment *seg);
+
+/* Resource management functions. */
+extern void dsm_pin_mapping(dsm_segment *seg);
+extern void dsm_unpin_mapping(dsm_segment *seg);
+extern void dsm_pin_segment(dsm_segment *seg);
+extern void dsm_unpin_segment(dsm_handle h);
+extern dsm_segment *dsm_find_mapping(dsm_handle h);
+
+/* Informational functions. */
+extern void *dsm_segment_address(dsm_segment *seg);
+extern Size dsm_segment_map_length(dsm_segment *seg);
+extern dsm_handle dsm_segment_handle(dsm_segment *seg);
+
+/* Cleanup hooks. */
+typedef void (*on_dsm_detach_callback) (dsm_segment *, Datum arg);
+extern void on_dsm_detach(dsm_segment *seg,
+						  on_dsm_detach_callback function, Datum arg);
+extern void cancel_on_dsm_detach(dsm_segment *seg,
+								 on_dsm_detach_callback function, Datum arg);
+extern void reset_on_dsm_detach(void);
+
+#endif							/* DSM_H */
diff --git a/src/include/storage/dsm_impl.h b/src/include/storage/dsm_impl.h
new file mode 100644
index 0000000..562cb78
--- /dev/null
+++ b/src/include/storage/dsm_impl.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm_impl.h
+ *	  low-level dynamic shared memory primitives
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/dsm_impl.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DSM_IMPL_H
+#define DSM_IMPL_H
+
+/* Dynamic shared memory implementations. */
+#define DSM_IMPL_POSIX			1
+#define DSM_IMPL_SYSV			2
+#define DSM_IMPL_WINDOWS		3
+#define DSM_IMPL_MMAP			4
+
+/*
+ * Determine which dynamic shared memory implementations will be supported
+ * on this platform, and which one will be the default.
+ */
+#ifdef WIN32
+#define USE_DSM_WINDOWS
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE		DSM_IMPL_WINDOWS
+#else
+#ifdef HAVE_SHM_OPEN
+#define USE_DSM_POSIX
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE		DSM_IMPL_POSIX
+#endif
+#define USE_DSM_SYSV
+#ifndef DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE		DSM_IMPL_SYSV
+#endif
+#define USE_DSM_MMAP
+#endif
+
+/* GUC. */
+extern int	dynamic_shared_memory_type;
+
+/*
+ * Directory for on-disk state.
+ *
+ * This is used by all implementations for crash recovery and by the mmap
+ * implementation for storage.
+ */
+#define PG_DYNSHMEM_DIR					"pg_dynshmem"
+#define PG_DYNSHMEM_MMAP_FILE_PREFIX	"mmap."
+
+/* A "name" for a dynamic shared memory segment. */
+typedef uint32 dsm_handle;
+
+/* All the shared-memory operations we know about. */
+typedef enum
+{
+	DSM_OP_CREATE,
+	DSM_OP_ATTACH,
+	DSM_OP_DETACH,
+	DSM_OP_DESTROY
+} dsm_op;
+
+/* Create, attach to, detach from, resize, or destroy a segment. */
+extern bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
+						void **impl_private, void **mapped_address, Size *mapped_size,
+						int elevel);
+
+/* Implementation-dependent actions required to keep segment until shutdown. */
+extern void dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
+								 void **impl_private_pm_handle);
+extern void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private);
+
+#endif							/* DSM_IMPL_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
new file mode 100644
index 0000000..8cd125d
--- /dev/null
+++ b/src/include/storage/fd.h
@@ -0,0 +1,168 @@
+/*-------------------------------------------------------------------------
+ *
+ * fd.h
+ *	  Virtual file descriptor definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/fd.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * calls:
+ *
+ *	File {Close, Read, Write, Size, Sync}
+ *	{Path Name Open, Allocate, Free} File
+ *
+ * These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
+ * Use them for all file activity...
+ *
+ *	File fd;
+ *	fd = PathNameOpenFile("foo", O_RDONLY);
+ *
+ *	AllocateFile();
+ *	FreeFile();
+ *
+ * Use AllocateFile, not fopen, if you need a stdio file (FILE*); then
+ * use FreeFile, not fclose, to close it.  AVOID using stdio for files
+ * that you intend to hold open for any length of time, since there is
+ * no way for them to share kernel file descriptors with other files.
+ *
+ * Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
+ * open directories (DIR*), and OpenTransientFile/CloseTransientFile for an
+ * unbuffered file descriptor.
+ *
+ * If you really can't use any of the above, at least call AcquireExternalFD
+ * or ReserveExternalFD to report any file descriptors that are held for any
+ * length of time.  Failure to do so risks unnecessary EMFILE errors.
+ */
+#ifndef FD_H
+#define FD_H
+
+#include <dirent.h>
+
+
+typedef int File;
+
+
+/* GUC parameter */
+extern PGDLLIMPORT int max_files_per_process;
+extern PGDLLIMPORT bool data_sync_retry;
+
+/*
+ * This is private to fd.c, but exported for save/restore_backend_variables()
+ */
+extern int	max_safe_fds;
+
+/*
+ * On Windows, we have to interpret EACCES as possibly meaning the same as
+ * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
+ * that's what you get.  Ugh.  This code is designed so that we don't
+ * actually believe these cases are okay without further evidence (namely,
+ * a pending fsync request getting canceled ... see ProcessSyncRequests).
+ */
+#ifndef WIN32
+#define FILE_POSSIBLY_DELETED(err)	((err) == ENOENT)
+#else
+#define FILE_POSSIBLY_DELETED(err)	((err) == ENOENT || (err) == EACCES)
+#endif
+
+/*
+ * prototypes for functions in fd.c
+ */
+
+/* Operations on virtual Files --- equivalent to Unix kernel file ops */
+extern File PathNameOpenFile(const char *fileName, int fileFlags);
+extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+extern File OpenTemporaryFile(bool interXact);
+extern void FileClose(File file);
+extern int	FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info);
+extern int	FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
+extern int	FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
+extern int	FileSync(File file, uint32 wait_event_info);
+extern off_t FileSize(File file);
+extern int	FileTruncate(File file, off_t offset, uint32 wait_event_info);
+extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info);
+extern char *FilePathName(File file);
+extern int	FileGetRawDesc(File file);
+extern int	FileGetRawFlags(File file);
+extern mode_t FileGetRawMode(File file);
+
+/* Operations used for sharing named temporary files */
+extern File PathNameCreateTemporaryFile(const char *name, bool error_on_failure);
+extern File PathNameOpenTemporaryFile(const char *name);
+extern bool PathNameDeleteTemporaryFile(const char *name, bool error_on_failure);
+extern void PathNameCreateTemporaryDir(const char *base, const char *name);
+extern void PathNameDeleteTemporaryDir(const char *name);
+extern void TempTablespacePath(char *path, Oid tablespace);
+
+/* Operations that allow use of regular stdio --- USE WITH CAUTION */
+extern FILE *AllocateFile(const char *name, const char *mode);
+extern int	FreeFile(FILE *file);
+
+/* Operations that allow use of pipe streams (popen/pclose) */
+extern FILE *OpenPipeStream(const char *command, const char *mode);
+extern int	ClosePipeStream(FILE *file);
+
+/* Operations to allow use of the <dirent.h> library routines */
+extern DIR *AllocateDir(const char *dirname);
+extern struct dirent *ReadDir(DIR *dir, const char *dirname);
+extern struct dirent *ReadDirExtended(DIR *dir, const char *dirname,
+									  int elevel);
+extern int	FreeDir(DIR *dir);
+
+/* Operations to allow use of a plain kernel FD, with automatic cleanup */
+extern int	OpenTransientFile(const char *fileName, int fileFlags);
+extern int	OpenTransientFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+extern int	CloseTransientFile(int fd);
+
+/* If you've really really gotta have a plain kernel FD, use this */
+extern int	BasicOpenFile(const char *fileName, int fileFlags);
+extern int	BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+
+/* Use these for other cases, and also for long-lived BasicOpenFile FDs */
+extern bool AcquireExternalFD(void);
+extern void ReserveExternalFD(void);
+extern void ReleaseExternalFD(void);
+
+/* Make a directory with default permissions */
+extern int	MakePGDirectory(const char *directoryName);
+
+/* Miscellaneous support routines */
+extern void InitFileAccess(void);
+extern void set_max_safe_fds(void);
+extern void closeAllVfds(void);
+extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
+extern bool TempTablespacesAreSet(void);
+extern int	GetTempTablespaces(Oid *tableSpaces, int numSpaces);
+extern Oid	GetNextTempTableSpace(void);
+extern void AtEOXact_Files(bool isCommit);
+extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
+							  SubTransactionId parentSubid);
+extern void RemovePgTempFiles(void);
+extern void RemovePgTempFilesInDir(const char *tmpdirname, bool missing_ok,
+								   bool unlink_all);
+extern bool looks_like_temp_rel_name(const char *name);
+
+extern int	pg_fsync(int fd);
+extern int	pg_fsync_no_writethrough(int fd);
+extern int	pg_fsync_writethrough(int fd);
+extern int	pg_fdatasync(int fd);
+extern void pg_flush_data(int fd, off_t offset, off_t amount);
+extern void fsync_fname(const char *fname, bool isdir);
+extern int	fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel);
+extern int	durable_rename(const char *oldfile, const char *newfile, int loglevel);
+extern int	durable_unlink(const char *fname, int loglevel);
+extern int	durable_rename_excl(const char *oldfile, const char *newfile, int loglevel);
+extern void SyncDataDirectory(void);
+extern int	data_sync_elevel(int elevel);
+
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
+#endif							/* FD_H */
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
new file mode 100644
index 0000000..81668ad
--- /dev/null
+++ b/src/include/storage/freespace.h
@@ -0,0 +1,39 @@
+/*-------------------------------------------------------------------------
+ *
+ * freespace.h
+ *	  POSTGRES free space map for quickly finding free space in relations
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/freespace.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FREESPACE_H_
+#define FREESPACE_H_
+
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+/* prototypes for public functions in freespace.c */
+extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
+extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
+extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
+												 BlockNumber oldPage,
+												 Size oldSpaceAvail,
+												 Size spaceNeeded);
+extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
+									Size spaceAvail);
+extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
+										Size spaceAvail);
+
+extern BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel,
+												  BlockNumber nblocks);
+extern void FreeSpaceMapVacuum(Relation rel);
+extern void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start,
+									BlockNumber end);
+
+#endif							/* FREESPACE_H_ */
diff --git a/src/include/storage/fsm_internals.h b/src/include/storage/fsm_internals.h
new file mode 100644
index 0000000..48d25a1
--- /dev/null
+++ b/src/include/storage/fsm_internals.h
@@ -0,0 +1,72 @@
+/*-------------------------------------------------------------------------
+ *
+ * fsm_internals.h
+ *	  internal functions for free space map
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/fsm_internals.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FSM_INTERNALS_H
+#define FSM_INTERNALS_H
+
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+
+/*
+ * Structure of a FSM page. See src/backend/storage/freespace/README for
+ * details.
+ */
+typedef struct
+{
+	/*
+	 * fsm_search_avail() tries to spread the load of multiple backends by
+	 * returning different pages to different backends in a round-robin
+	 * fashion. fp_next_slot points to the next slot to be returned (assuming
+	 * there's enough space on it for the request). It's defined as an int,
+	 * because it's updated without an exclusive lock. uint16 would be more
+	 * appropriate, but int is more likely to be atomically
+	 * fetchable/storable.
+	 */
+	int			fp_next_slot;
+
+	/*
+	 * fp_nodes contains the binary tree, stored in array. The first
+	 * NonLeafNodesPerPage elements are upper nodes, and the following
+	 * LeafNodesPerPage elements are leaf nodes. Unused nodes are zero.
+	 */
+	uint8		fp_nodes[FLEXIBLE_ARRAY_MEMBER];
+} FSMPageData;
+
+typedef FSMPageData *FSMPage;
+
+/*
+ * Number of non-leaf and leaf nodes, and nodes in total, on an FSM page.
+ * These definitions are internal to fsmpage.c.
+ */
+#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+					  offsetof(FSMPageData, fp_nodes))
+
+#define NonLeafNodesPerPage (BLCKSZ / 2 - 1)
+#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage)
+
+/*
+ * Number of FSM "slots" on a FSM page. This is what should be used
+ * outside fsmpage.c.
+ */
+#define SlotsPerFSMPage LeafNodesPerPage
+
+/* Prototypes for functions in fsmpage.c */
+extern int	fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext,
+							 bool exclusive_lock_held);
+extern uint8 fsm_get_avail(Page page, int slot);
+extern uint8 fsm_get_max_avail(Page page);
+extern bool fsm_set_avail(Page page, int slot, uint8 value);
+extern bool fsm_truncate_avail(Page page, int nslots);
+extern bool fsm_rebuild_page(Page page);
+
+#endif							/* FSM_INTERNALS_H */
diff --git a/src/include/storage/indexfsm.h b/src/include/storage/indexfsm.h
new file mode 100644
index 0000000..fb896b4
--- /dev/null
+++ b/src/include/storage/indexfsm.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexfsm.h
+ *	  POSTGRES free space map for quickly finding an unused page in index
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/indexfsm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef INDEXFSM_H_
+#define INDEXFSM_H_
+
+#include "storage/block.h"
+#include "utils/relcache.h"
+
+extern BlockNumber GetFreeIndexPage(Relation rel);
+extern void RecordFreeIndexPage(Relation rel, BlockNumber page);
+extern void RecordUsedIndexPage(Relation rel, BlockNumber page);
+
+extern void IndexFreeSpaceMapVacuum(Relation rel);
+
+#endif							/* INDEXFSM_H_ */
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
new file mode 100644
index 0000000..462fe46
--- /dev/null
+++ b/src/include/storage/ipc.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * ipc.h
+ *	  POSTGRES inter-process communication definitions.
+ *
+ * This file is misnamed, as it no longer has much of anything directly
+ * to do with IPC.  The functionality here is concerned with managing
+ * exit-time cleanup for either a postmaster or a backend.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/ipc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef IPC_H
+#define IPC_H
+
+typedef void (*pg_on_exit_callback) (int code, Datum arg);
+typedef void (*shmem_startup_hook_type) (void);
+
+/*----------
+ * API for handling cleanup that must occur during either ereport(ERROR)
+ * or ereport(FATAL) exits from a block of code.  (Typical examples are
+ * undoing transient changes to shared-memory state.)
+ *
+ *		PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
+ *		{
+ *			... code that might throw ereport(ERROR) or ereport(FATAL) ...
+ *		}
+ *		PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg);
+ *
+ * where the cleanup code is in a function declared per pg_on_exit_callback.
+ * The Datum value "arg" can carry any information the cleanup function
+ * needs.
+ *
+ * This construct ensures that cleanup_function() will be called during
+ * either ERROR or FATAL exits.  It will not be called on successful
+ * exit from the controlled code.  (If you want it to happen then too,
+ * call the function yourself from just after the construct.)
+ *
+ * Note: the macro arguments are multiply evaluated, so avoid side-effects.
+ *----------
+ */
+#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)	\
+	do { \
+		before_shmem_exit(cleanup_function, arg); \
+		PG_TRY()
+
+#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)	\
+		cancel_before_shmem_exit(cleanup_function, arg); \
+		PG_CATCH(); \
+		{ \
+			cancel_before_shmem_exit(cleanup_function, arg); \
+			cleanup_function (0, arg); \
+			PG_RE_THROW(); \
+		} \
+		PG_END_TRY(); \
+	} while (0)
+
+
+/* ipc.c */
+extern PGDLLIMPORT bool proc_exit_inprogress;
+extern PGDLLIMPORT bool shmem_exit_inprogress;
+
+extern void proc_exit(int code) pg_attribute_noreturn();
+extern void shmem_exit(int code);
+extern void on_proc_exit(pg_on_exit_callback function, Datum arg);
+extern void on_shmem_exit(pg_on_exit_callback function, Datum arg);
+extern void before_shmem_exit(pg_on_exit_callback function, Datum arg);
+extern void cancel_before_shmem_exit(pg_on_exit_callback function, Datum arg);
+extern void on_exit_reset(void);
+
+/* ipci.c */
+extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
+
+extern void CreateSharedMemoryAndSemaphores(void);
+
+#endif							/* IPC_H */
diff --git a/src/include/storage/item.h b/src/include/storage/item.h
new file mode 100644
index 0000000..43a4726
--- /dev/null
+++ b/src/include/storage/item.h
@@ -0,0 +1,19 @@
+/*-------------------------------------------------------------------------
+ *
+ * item.h
+ *	  POSTGRES disk item definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/item.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITEM_H
+#define ITEM_H
+
+typedef Pointer Item;
+
+#endif							/* ITEM_H */
diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h
new file mode 100644
index 0000000..5b0229b
--- /dev/null
+++ b/src/include/storage/itemid.h
@@ -0,0 +1,184 @@
+/*-------------------------------------------------------------------------
+ *
+ * itemid.h
+ *	  Standard POSTGRES buffer page item identifier/line pointer definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/itemid.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITEMID_H
+#define ITEMID_H
+
+/*
+ * A line pointer on a buffer page.  See buffer page definitions and comments
+ * for an explanation of how line pointers are used.
+ *
+ * In some cases a line pointer is "in use" but does not have any associated
+ * storage on the page.  By convention, lp_len == 0 in every line pointer
+ * that does not have storage, independently of its lp_flags state.
+ */
+typedef struct ItemIdData
+{
+	unsigned	lp_off:15,		/* offset to tuple (from start of page) */
+				lp_flags:2,		/* state of line pointer, see below */
+				lp_len:15;		/* byte length of tuple */
+} ItemIdData;
+
+typedef ItemIdData *ItemId;
+
+/*
+ * lp_flags has these possible states.  An UNUSED line pointer is available
+ * for immediate re-use, the other states are not.
+ */
+#define LP_UNUSED		0		/* unused (should always have lp_len=0) */
+#define LP_NORMAL		1		/* used (should always have lp_len>0) */
+#define LP_REDIRECT		2		/* HOT redirect (should have lp_len=0) */
+#define LP_DEAD			3		/* dead, may or may not have storage */
+
+/*
+ * Item offsets and lengths are represented by these types when
+ * they're not actually stored in an ItemIdData.
+ */
+typedef uint16 ItemOffset;
+typedef uint16 ItemLength;
+
+
+/* ----------------
+ *		support macros
+ * ----------------
+ */
+
+/*
+ *		ItemIdGetLength
+ */
+#define ItemIdGetLength(itemId) \
+   ((itemId)->lp_len)
+
+/*
+ *		ItemIdGetOffset
+ */
+#define ItemIdGetOffset(itemId) \
+   ((itemId)->lp_off)
+
+/*
+ *		ItemIdGetFlags
+ */
+#define ItemIdGetFlags(itemId) \
+   ((itemId)->lp_flags)
+
+/*
+ *		ItemIdGetRedirect
+ * In a REDIRECT pointer, lp_off holds offset number for next line pointer
+ */
+#define ItemIdGetRedirect(itemId) \
+   ((itemId)->lp_off)
+
+/*
+ * ItemIdIsValid
+ *		True iff item identifier is valid.
+ *		This is a pretty weak test, probably useful only in Asserts.
+ */
+#define ItemIdIsValid(itemId)	PointerIsValid(itemId)
+
+/*
+ * ItemIdIsUsed
+ *		True iff item identifier is in use.
+ */
+#define ItemIdIsUsed(itemId) \
+	((itemId)->lp_flags != LP_UNUSED)
+
+/*
+ * ItemIdIsNormal
+ *		True iff item identifier is in state NORMAL.
+ */
+#define ItemIdIsNormal(itemId) \
+	((itemId)->lp_flags == LP_NORMAL)
+
+/*
+ * ItemIdIsRedirected
+ *		True iff item identifier is in state REDIRECT.
+ */
+#define ItemIdIsRedirected(itemId) \
+	((itemId)->lp_flags == LP_REDIRECT)
+
+/*
+ * ItemIdIsDead
+ *		True iff item identifier is in state DEAD.
+ */
+#define ItemIdIsDead(itemId) \
+	((itemId)->lp_flags == LP_DEAD)
+
+/*
+ * ItemIdHasStorage
+ *		True iff item identifier has associated storage.
+ */
+#define ItemIdHasStorage(itemId) \
+	((itemId)->lp_len != 0)
+
+/*
+ * ItemIdSetUnused
+ *		Set the item identifier to be UNUSED, with no storage.
+ *		Beware of multiple evaluations of itemId!
+ */
+#define ItemIdSetUnused(itemId) \
+( \
+	(itemId)->lp_flags = LP_UNUSED, \
+	(itemId)->lp_off = 0, \
+	(itemId)->lp_len = 0 \
+)
+
+/*
+ * ItemIdSetNormal
+ *		Set the item identifier to be NORMAL, with the specified storage.
+ *		Beware of multiple evaluations of itemId!
+ */
+#define ItemIdSetNormal(itemId, off, len) \
+( \
+	(itemId)->lp_flags = LP_NORMAL, \
+	(itemId)->lp_off = (off), \
+	(itemId)->lp_len = (len) \
+)
+
+/*
+ * ItemIdSetRedirect
+ *		Set the item identifier to be REDIRECT, with the specified link.
+ *		Beware of multiple evaluations of itemId!
+ */
+#define ItemIdSetRedirect(itemId, link) \
+( \
+	(itemId)->lp_flags = LP_REDIRECT, \
+	(itemId)->lp_off = (link), \
+	(itemId)->lp_len = 0 \
+)
+
+/*
+ * ItemIdSetDead
+ *		Set the item identifier to be DEAD, with no storage.
+ *		Beware of multiple evaluations of itemId!
+ */
+#define ItemIdSetDead(itemId) \
+( \
+	(itemId)->lp_flags = LP_DEAD, \
+	(itemId)->lp_off = 0, \
+	(itemId)->lp_len = 0 \
+)
+
+/*
+ * ItemIdMarkDead
+ *		Set the item identifier to be DEAD, keeping its existing storage.
+ *
+ * Note: in indexes, this is used as if it were a hint-bit mechanism;
+ * we trust that multiple processors can do this in parallel and get
+ * the same result.
+ */
+#define ItemIdMarkDead(itemId) \
+( \
+	(itemId)->lp_flags = LP_DEAD \
+)
+
+#endif							/* ITEMID_H */
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
new file mode 100644
index 0000000..944f6fe
--- /dev/null
+++ b/src/include/storage/itemptr.h
@@ -0,0 +1,206 @@
+/*-------------------------------------------------------------------------
+ *
+ * itemptr.h
+ *	  POSTGRES disk item pointer definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/itemptr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITEMPTR_H
+#define ITEMPTR_H
+
+#include "storage/block.h"
+#include "storage/off.h"
+
+/*
+ * ItemPointer:
+ *
+ * This is a pointer to an item within a disk page of a known file
+ * (for example, a cross-link from an index to its parent table).
+ * ip_blkid tells us which block, ip_posid tells us which entry in
+ * the linp (ItemIdData) array we want.
+ *
+ * Note: because there is an item pointer in each tuple header and index
+ * tuple header on disk, it's very important not to waste space with
+ * structure padding bytes.  The struct is designed to be six bytes long
+ * (it contains three int16 fields) but a few compilers will pad it to
+ * eight bytes unless coerced.  We apply appropriate persuasion where
+ * possible.  If your compiler can't be made to play along, you'll waste
+ * lots of space.
+ */
+typedef struct ItemPointerData
+{
+	BlockIdData ip_blkid;
+	OffsetNumber ip_posid;
+}
+
+/* If compiler understands packed and aligned pragmas, use those */
+#if defined(pg_attribute_packed) && defined(pg_attribute_aligned)
+			pg_attribute_packed()
+			pg_attribute_aligned(2)
+#endif
+ItemPointerData;
+
+typedef ItemPointerData *ItemPointer;
+
+/* ----------------
+ *		special values used in heap tuples (t_ctid)
+ * ----------------
+ */
+
+/*
+ * If a heap tuple holds a speculative insertion token rather than a real
+ * TID, ip_posid is set to SpecTokenOffsetNumber, and the token is stored in
+ * ip_blkid. SpecTokenOffsetNumber must be higher than MaxOffsetNumber, so
+ * that it can be distinguished from a valid offset number in a regular item
+ * pointer.
+ */
+#define SpecTokenOffsetNumber		0xfffe
+
+/*
+ * When a tuple is moved to a different partition by UPDATE, the t_ctid of
+ * the old tuple version is set to this magic value.
+ */
+#define MovedPartitionsOffsetNumber 0xfffd
+#define MovedPartitionsBlockNumber	InvalidBlockNumber
+
+
+/* ----------------
+ *		support macros
+ * ----------------
+ */
+
+/*
+ * ItemPointerIsValid
+ *		True iff the disk item pointer is not NULL.
+ */
+#define ItemPointerIsValid(pointer) \
+	((bool) (PointerIsValid(pointer) && ((pointer)->ip_posid != 0)))
+
+/*
+ * ItemPointerGetBlockNumberNoCheck
+ *		Returns the block number of a disk item pointer.
+ */
+#define ItemPointerGetBlockNumberNoCheck(pointer) \
+( \
+	BlockIdGetBlockNumber(&(pointer)->ip_blkid) \
+)
+
+/*
+ * ItemPointerGetBlockNumber
+ *		As above, but verifies that the item pointer looks valid.
+ */
+#define ItemPointerGetBlockNumber(pointer) \
+( \
+	AssertMacro(ItemPointerIsValid(pointer)), \
+	ItemPointerGetBlockNumberNoCheck(pointer) \
+)
+
+/*
+ * ItemPointerGetOffsetNumberNoCheck
+ *		Returns the offset number of a disk item pointer.
+ */
+#define ItemPointerGetOffsetNumberNoCheck(pointer) \
+( \
+	(pointer)->ip_posid \
+)
+
+/*
+ * ItemPointerGetOffsetNumber
+ *		As above, but verifies that the item pointer looks valid.
+ */
+#define ItemPointerGetOffsetNumber(pointer) \
+( \
+	AssertMacro(ItemPointerIsValid(pointer)), \
+	ItemPointerGetOffsetNumberNoCheck(pointer) \
+)
+
+/*
+ * ItemPointerSet
+ *		Sets a disk item pointer to the specified block and offset.
+ */
+#define ItemPointerSet(pointer, blockNumber, offNum) \
+( \
+	AssertMacro(PointerIsValid(pointer)), \
+	BlockIdSet(&((pointer)->ip_blkid), blockNumber), \
+	(pointer)->ip_posid = offNum \
+)
+
+/*
+ * ItemPointerSetBlockNumber
+ *		Sets a disk item pointer to the specified block.
+ */
+#define ItemPointerSetBlockNumber(pointer, blockNumber) \
+( \
+	AssertMacro(PointerIsValid(pointer)), \
+	BlockIdSet(&((pointer)->ip_blkid), blockNumber) \
+)
+
+/*
+ * ItemPointerSetOffsetNumber
+ *		Sets a disk item pointer to the specified offset.
+ */
+#define ItemPointerSetOffsetNumber(pointer, offsetNumber) \
+( \
+	AssertMacro(PointerIsValid(pointer)), \
+	(pointer)->ip_posid = (offsetNumber) \
+)
+
+/*
+ * ItemPointerCopy
+ *		Copies the contents of one disk item pointer to another.
+ *
+ * Should there ever be padding in an ItemPointer this would need to be handled
+ * differently as it's used as hash key.
+ */
+#define ItemPointerCopy(fromPointer, toPointer) \
+( \
+	AssertMacro(PointerIsValid(toPointer)), \
+	AssertMacro(PointerIsValid(fromPointer)), \
+	*(toPointer) = *(fromPointer) \
+)
+
+/*
+ * ItemPointerSetInvalid
+ *		Sets a disk item pointer to be invalid.
+ */
+#define ItemPointerSetInvalid(pointer) \
+( \
+	AssertMacro(PointerIsValid(pointer)), \
+	BlockIdSet(&((pointer)->ip_blkid), InvalidBlockNumber), \
+	(pointer)->ip_posid = InvalidOffsetNumber \
+)
+
+/*
+ * ItemPointerIndicatesMovedPartitions
+ *		True iff the block number indicates the tuple has moved to another
+ *		partition.
+ */
+#define ItemPointerIndicatesMovedPartitions(pointer) \
+( \
+	ItemPointerGetOffsetNumber(pointer) == MovedPartitionsOffsetNumber && \
+	ItemPointerGetBlockNumberNoCheck(pointer) == MovedPartitionsBlockNumber \
+)
+
+/*
+ * ItemPointerSetMovedPartitions
+ *		Indicate that the item referenced by the itempointer has moved into a
+ *		different partition.
+ */
+#define ItemPointerSetMovedPartitions(pointer) \
+	ItemPointerSet((pointer), MovedPartitionsBlockNumber, MovedPartitionsOffsetNumber)
+
+/* ----------------
+ *		externs
+ * ----------------
+ */
+
+extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
+extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
+
+#endif							/* ITEMPTR_H */
diff --git a/src/include/storage/large_object.h b/src/include/storage/large_object.h
new file mode 100644
index 0000000..2623d1a
--- /dev/null
+++ b/src/include/storage/large_object.h
@@ -0,0 +1,100 @@
+/*-------------------------------------------------------------------------
+ *
+ * large_object.h
+ *	  Declarations for PostgreSQL large objects.  POSTGRES 4.2 supported
+ *	  zillions of large objects (internal, external, jaquith, inversion).
+ *	  Now we only support inversion.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/large_object.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LARGE_OBJECT_H
+#define LARGE_OBJECT_H
+
+#include "utils/snapshot.h"
+
+
+/*----------
+ * Data about a currently-open large object.
+ *
+ * id is the logical OID of the large object
+ * snapshot is the snapshot to use for read/write operations
+ * subid is the subtransaction that opened the desc (or currently owns it)
+ * offset is the current seek offset within the LO
+ * flags contains some flag bits
+ *
+ * NOTE: as of v11, permission checks are made when the large object is
+ * opened; therefore IFS_RDLOCK/IFS_WRLOCK indicate that read or write mode
+ * has been requested *and* the corresponding permission has been checked.
+ *
+ * NOTE: before 7.1, we also had to store references to the separate table
+ * and index of a specific large object.  Now they all live in pg_largeobject
+ * and are accessed via a common relation descriptor.
+ *----------
+ */
+typedef struct LargeObjectDesc
+{
+	Oid			id;				/* LO's identifier */
+	Snapshot	snapshot;		/* snapshot to use */
+	SubTransactionId subid;		/* owning subtransaction ID */
+	uint64		offset;			/* current seek pointer */
+	int			flags;			/* see flag bits below */
+
+/* bits in flags: */
+#define IFS_RDLOCK		(1 << 0)	/* LO was opened for reading */
+#define IFS_WRLOCK		(1 << 1)	/* LO was opened for writing */
+
+} LargeObjectDesc;
+
+
+/*
+ * Each "page" (tuple) of a large object can hold this much data
+ *
+ * We could set this as high as BLCKSZ less some overhead, but it seems
+ * better to make it a smaller value, so that not as much space is used
+ * up when a page-tuple is updated.  Note that the value is deliberately
+ * chosen large enough to trigger the tuple toaster, so that we will
+ * attempt to compress page tuples in-line.  (But they won't be moved off
+ * unless the user creates a toast-table for pg_largeobject...)
+ *
+ * Also, it seems to be a smart move to make the page size be a power of 2,
+ * since clients will often be written to send data in power-of-2 blocks.
+ * This avoids unnecessary tuple updates caused by partial-page writes.
+ *
+ * NB: Changing LOBLKSIZE requires an initdb.
+ */
+#define LOBLKSIZE		(BLCKSZ / 4)
+
+/*
+ * Maximum length in bytes for a large object.  To make this larger, we'd
+ * have to widen pg_largeobject.pageno as well as various internal variables.
+ */
+#define MAX_LARGE_OBJECT_SIZE	((int64) INT_MAX * LOBLKSIZE)
+
+
+/*
+ * GUC: backwards-compatibility flag to suppress LO permission checks
+ */
+extern bool lo_compat_privileges;
+
+/*
+ * Function definitions...
+ */
+
+/* inversion stuff in inv_api.c */
+extern void close_lo_relation(bool isCommit);
+extern Oid	inv_create(Oid lobjId);
+extern LargeObjectDesc *inv_open(Oid lobjId, int flags, MemoryContext mcxt);
+extern void inv_close(LargeObjectDesc *obj_desc);
+extern int	inv_drop(Oid lobjId);
+extern int64 inv_seek(LargeObjectDesc *obj_desc, int64 offset, int whence);
+extern int64 inv_tell(LargeObjectDesc *obj_desc);
+extern int	inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes);
+extern int	inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes);
+extern void inv_truncate(LargeObjectDesc *obj_desc, int64 len);
+
+#endif							/* LARGE_OBJECT_H */
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
new file mode 100644
index 0000000..46ae56c
--- /dev/null
+++ b/src/include/storage/latch.h
@@ -0,0 +1,190 @@
+/*-------------------------------------------------------------------------
+ *
+ * latch.h
+ *	  Routines for interprocess latches
+ *
+ * A latch is a boolean variable, with operations that let processes sleep
+ * until it is set. A latch can be set from another process, or a signal
+ * handler within the same process.
+ *
+ * The latch interface is a reliable replacement for the common pattern of
+ * using pg_usleep() or select() to wait until a signal arrives, where the
+ * signal handler sets a flag variable. Because on some platforms an
+ * incoming signal doesn't interrupt sleep, and even on platforms where it
+ * does there is a race condition if the signal arrives just before
+ * entering the sleep, the common pattern must periodically wake up and
+ * poll the flag variable. The pselect() system call was invented to solve
+ * this problem, but it is not portable enough. Latches are designed to
+ * overcome these limitations, allowing you to sleep without polling and
+ * ensuring quick response to signals from other processes.
+ *
+ * There are two kinds of latches: local and shared. A local latch is
+ * initialized by InitLatch, and can only be set from the same process.
+ * A local latch can be used to wait for a signal to arrive, by calling
+ * SetLatch in the signal handler. A shared latch resides in shared memory,
+ * and must be initialized at postmaster startup by InitSharedLatch. Before
+ * a shared latch can be waited on, it must be associated with a process
+ * with OwnLatch. Only the process owning the latch can wait on it, but any
+ * process can set it.
+ *
+ * There are three basic operations on a latch:
+ *
+ * SetLatch		- Sets the latch
+ * ResetLatch	- Clears the latch, allowing it to be set again
+ * WaitLatch	- Waits for the latch to become set
+ *
+ * WaitLatch includes a provision for timeouts (which should be avoided
+ * when possible, as they incur extra overhead) and a provision for
+ * postmaster child processes to wake up immediately on postmaster death.
+ * See latch.c for detailed specifications for the exported functions.
+ *
+ * The correct pattern to wait for event(s) is:
+ *
+ * for (;;)
+ * {
+ *	   ResetLatch();
+ *	   if (work to do)
+ *		   Do Stuff();
+ *	   WaitLatch();
+ * }
+ *
+ * It's important to reset the latch *before* checking if there's work to
+ * do. Otherwise, if someone sets the latch between the check and the
+ * ResetLatch call, you will miss it and Wait will incorrectly block.
+ *
+ * Another valid coding pattern looks like:
+ *
+ * for (;;)
+ * {
+ *	   if (work to do)
+ *		   Do Stuff(); // in particular, exit loop if some condition satisfied
+ *	   WaitLatch();
+ *	   ResetLatch();
+ * }
+ *
+ * This is useful to reduce latch traffic if it's expected that the loop's
+ * termination condition will often be satisfied in the first iteration;
+ * the cost is an extra loop iteration before blocking when it is not.
+ * What must be avoided is placing any checks for asynchronous events after
+ * WaitLatch and before ResetLatch, as that creates a race condition.
+ *
+ * To wake up the waiter, you must first set a global flag or something
+ * else that the wait loop tests in the "if (work to do)" part, and call
+ * SetLatch *after* that. SetLatch is designed to return quickly if the
+ * latch is already set.
+ *
+ * On some platforms, signals will not interrupt the latch wait primitive
+ * by themselves.  Therefore, it is critical that any signal handler that
+ * is meant to terminate a WaitLatch wait calls SetLatch.
+ *
+ * Note that use of the process latch (PGPROC.procLatch) is generally better
+ * than an ad-hoc shared latch for signaling auxiliary processes.  This is
+ * because generic signal handlers will call SetLatch on the process latch
+ * only, so using any latch other than the process latch effectively precludes
+ * use of any generic handler.
+ *
+ *
+ * WaitEventSets allow to wait for latches being set and additional events -
+ * postmaster dying and socket readiness of several sockets currently - at the
+ * same time.  On many platforms using a long lived event set is more
+ * efficient than using WaitLatch or WaitLatchOrSocket.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/latch.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LATCH_H
+#define LATCH_H
+
+#include <signal.h>
+
+/*
+ * Latch structure should be treated as opaque and only accessed through
+ * the public functions. It is defined here to allow embedding Latches as
+ * part of bigger structs.
+ */
+typedef struct Latch
+{
+	sig_atomic_t is_set;
+	bool		is_shared;
+	int			owner_pid;
+#ifdef WIN32
+	HANDLE		event;
+#endif
+} Latch;
+
+/*
+ * Bitmasks for events that may wake-up WaitLatch(), WaitLatchOrSocket(), or
+ * WaitEventSetWait().
+ */
+#define WL_LATCH_SET		 (1 << 0)
+#define WL_SOCKET_READABLE	 (1 << 1)
+#define WL_SOCKET_WRITEABLE  (1 << 2)
+#define WL_TIMEOUT			 (1 << 3)	/* not for WaitEventSetWait() */
+#define WL_POSTMASTER_DEATH  (1 << 4)
+#define WL_EXIT_ON_PM_DEATH	 (1 << 5)
+#ifdef WIN32
+#define WL_SOCKET_CONNECTED  (1 << 6)
+#else
+/* avoid having to deal with case on platforms not requiring it */
+#define WL_SOCKET_CONNECTED  WL_SOCKET_WRITEABLE
+#endif
+
+#define WL_SOCKET_MASK		(WL_SOCKET_READABLE | \
+							 WL_SOCKET_WRITEABLE | \
+							 WL_SOCKET_CONNECTED)
+
+typedef struct WaitEvent
+{
+	int			pos;			/* position in the event data structure */
+	uint32		events;			/* triggered events */
+	pgsocket	fd;				/* socket fd associated with event */
+	void	   *user_data;		/* pointer provided in AddWaitEventToSet */
+#ifdef WIN32
+	bool		reset;			/* Is reset of the event required? */
+#endif
+} WaitEvent;
+
+/* forward declaration to avoid exposing latch.c implementation details */
+typedef struct WaitEventSet WaitEventSet;
+
+/*
+ * prototypes for functions in latch.c
+ */
+extern void InitializeLatchSupport(void);
+extern void InitLatch(Latch *latch);
+extern void InitSharedLatch(Latch *latch);
+extern void OwnLatch(Latch *latch);
+extern void DisownLatch(Latch *latch);
+extern void SetLatch(Latch *latch);
+extern void ResetLatch(Latch *latch);
+
+extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents);
+extern void FreeWaitEventSet(WaitEventSet *set);
+extern int	AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,
+							  Latch *latch, void *user_data);
+extern void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch);
+
+extern int	WaitEventSetWait(WaitEventSet *set, long timeout,
+							 WaitEvent *occurred_events, int nevents,
+							 uint32 wait_event_info);
+extern int	WaitLatch(Latch *latch, int wakeEvents, long timeout,
+					  uint32 wait_event_info);
+extern int	WaitLatchOrSocket(Latch *latch, int wakeEvents,
+							  pgsocket sock, long timeout, uint32 wait_event_info);
+
+/*
+ * Unix implementation uses SIGUSR1 for inter-process signaling.
+ * Win32 doesn't need this.
+ */
+#ifndef WIN32
+extern void latch_sigusr1_handler(void);
+#else
+#define latch_sigusr1_handler()  ((void) 0)
+#endif
+
+#endif							/* LATCH_H */
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
new file mode 100644
index 0000000..f7cabcb
--- /dev/null
+++ b/src/include/storage/lmgr.h
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * lmgr.h
+ *	  POSTGRES lock manager definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/lmgr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LMGR_H
+#define LMGR_H
+
+#include "lib/stringinfo.h"
+#include "storage/itemptr.h"
+#include "storage/lock.h"
+#include "utils/rel.h"
+
+
+/* XactLockTableWait operations */
+typedef enum XLTW_Oper
+{
+	XLTW_None,
+	XLTW_Update,
+	XLTW_Delete,
+	XLTW_Lock,
+	XLTW_LockUpdated,
+	XLTW_InsertIndex,
+	XLTW_InsertIndexUnique,
+	XLTW_FetchUpdated,
+	XLTW_RecheckExclusionConstr
+} XLTW_Oper;
+
+extern void RelationInitLockInfo(Relation relation);
+
+/* Lock a relation */
+extern void LockRelationOid(Oid relid, LOCKMODE lockmode);
+extern bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode);
+extern void UnlockRelationId(LockRelId *relid, LOCKMODE lockmode);
+extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
+
+extern void LockRelation(Relation relation, LOCKMODE lockmode);
+extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
+extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
+extern bool CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode,
+									bool orstronger);
+extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
+
+extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
+extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
+
+/* Lock a relation for extension */
+extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode);
+extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode);
+extern bool ConditionalLockRelationForExtension(Relation relation,
+												LOCKMODE lockmode);
+extern int	RelationExtensionLockWaiterCount(Relation relation);
+
+/* Lock to recompute pg_database.datfrozenxid in the current database */
+extern void LockDatabaseFrozenIds(LOCKMODE lockmode);
+
+/* Lock a page (currently only used within indexes) */
+extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
+extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
+extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
+
+/* Lock a tuple (see heap_lock_tuple before assuming you understand this) */
+extern void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
+extern bool ConditionalLockTuple(Relation relation, ItemPointer tid,
+								 LOCKMODE lockmode);
+extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode);
+
+/* Lock an XID (used to wait for a transaction to finish) */
+extern void XactLockTableInsert(TransactionId xid);
+extern void XactLockTableDelete(TransactionId xid);
+extern void XactLockTableWait(TransactionId xid, Relation rel,
+							  ItemPointer ctid, XLTW_Oper oper);
+extern bool ConditionalXactLockTableWait(TransactionId xid);
+
+/* Lock VXIDs, specified by conflicting locktags */
+extern void WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode, bool progress);
+extern void WaitForLockersMultiple(List *locktags, LOCKMODE lockmode, bool progress);
+
+/* Lock an XID for tuple insertion (used to wait for an insertion to finish) */
+extern uint32 SpeculativeInsertionLockAcquire(TransactionId xid);
+extern void SpeculativeInsertionLockRelease(TransactionId xid);
+extern void SpeculativeInsertionWait(TransactionId xid, uint32 token);
+
+/* Lock a general object (other than a relation) of the current database */
+extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
+							   LOCKMODE lockmode);
+extern void UnlockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
+								 LOCKMODE lockmode);
+
+/* Lock a shared-across-databases object (other than a relation) */
+extern void LockSharedObject(Oid classid, Oid objid, uint16 objsubid,
+							 LOCKMODE lockmode);
+extern void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid,
+							   LOCKMODE lockmode);
+
+extern void LockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
+									   LOCKMODE lockmode);
+extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid,
+										 LOCKMODE lockmode);
+
+/* Describe a locktag for error messages */
+extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag);
+
+extern const char *GetLockNameFromTagType(uint16 locktag_type);
+
+#endif							/* LMGR_H */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
new file mode 100644
index 0000000..2987c5e
--- /dev/null
+++ b/src/include/storage/lock.h
@@ -0,0 +1,612 @@
+/*-------------------------------------------------------------------------
+ *
+ * lock.h
+ *	  POSTGRES low-level lock mechanism
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/lock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOCK_H_
+#define LOCK_H_
+
+#ifdef FRONTEND
+#error "lock.h may not be included from frontend code"
+#endif
+
+#include "storage/backendid.h"
+#include "storage/lockdefs.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+
+/* struct PGPROC is declared in proc.h, but must forward-reference it */
+typedef struct PGPROC PGPROC;
+
+typedef struct PROC_QUEUE
+{
+	SHM_QUEUE	links;			/* head of list of PGPROC objects */
+	int			size;			/* number of entries in list */
+} PROC_QUEUE;
+
+/* GUC variables */
+extern int	max_locks_per_xact;
+
+#ifdef LOCK_DEBUG
+extern int	Trace_lock_oidmin;
+extern bool Trace_locks;
+extern bool Trace_userlocks;
+extern int	Trace_lock_table;
+extern bool Debug_deadlocks;
+#endif							/* LOCK_DEBUG */
+
+
+/*
+ * Top-level transactions are identified by VirtualTransactionIDs comprising
+ * PGPROC fields backendId and lxid.  For prepared transactions, the
+ * LocalTransactionId is an ordinary XID.  These are guaranteed unique over
+ * the short term, but will be reused after a database restart or XID
+ * wraparound; hence they should never be stored on disk.
+ *
+ * Note that struct VirtualTransactionId can not be assumed to be atomically
+ * assignable as a whole.  However, type LocalTransactionId is assumed to
+ * be atomically assignable, and the backend ID doesn't change often enough
+ * to be a problem, so we can fetch or assign the two fields separately.
+ * We deliberately refrain from using the struct within PGPROC, to prevent
+ * coding errors from trying to use struct assignment with it; instead use
+ * GET_VXID_FROM_PGPROC().
+ */
+typedef struct
+{
+	BackendId	backendId;		/* backendId from PGPROC */
+	LocalTransactionId localTransactionId;	/* lxid from PGPROC */
+} VirtualTransactionId;
+
+#define InvalidLocalTransactionId		0
+#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId)
+#define VirtualTransactionIdIsValid(vxid) \
+	(LocalTransactionIdIsValid((vxid).localTransactionId))
+#define VirtualTransactionIdIsPreparedXact(vxid) \
+	((vxid).backendId == InvalidBackendId)
+#define VirtualTransactionIdEquals(vxid1, vxid2) \
+	((vxid1).backendId == (vxid2).backendId && \
+	 (vxid1).localTransactionId == (vxid2).localTransactionId)
+#define SetInvalidVirtualTransactionId(vxid) \
+	((vxid).backendId = InvalidBackendId, \
+	 (vxid).localTransactionId = InvalidLocalTransactionId)
+#define GET_VXID_FROM_PGPROC(vxid, proc) \
+	((vxid).backendId = (proc).backendId, \
+	 (vxid).localTransactionId = (proc).lxid)
+
+/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
+#define MAX_LOCKMODES		10
+
+#define LOCKBIT_ON(lockmode) (1 << (lockmode))
+#define LOCKBIT_OFF(lockmode) (~(1 << (lockmode)))
+
+
+/*
+ * This data structure defines the locking semantics associated with a
+ * "lock method".  The semantics specify the meaning of each lock mode
+ * (by defining which lock modes it conflicts with).
+ * All of this data is constant and is kept in const tables.
+ *
+ * numLockModes -- number of lock modes (READ,WRITE,etc) that
+ *		are defined in this lock method.  Must be less than MAX_LOCKMODES.
+ *
+ * conflictTab -- this is an array of bitmasks showing lock
+ *		mode conflicts.  conflictTab[i] is a mask with the j-th bit
+ *		turned on if lock modes i and j conflict.  Lock modes are
+ *		numbered 1..numLockModes; conflictTab[0] is unused.
+ *
+ * lockModeNames -- ID strings for debug printouts.
+ *
+ * trace_flag -- pointer to GUC trace flag for this lock method.  (The
+ * GUC variable is not constant, but we use "const" here to denote that
+ * it can't be changed through this reference.)
+ */
+typedef struct LockMethodData
+{
+	int			numLockModes;
+	const LOCKMASK *conflictTab;
+	const char *const *lockModeNames;
+	const bool *trace_flag;
+} LockMethodData;
+
+typedef const LockMethodData *LockMethod;
+
+/*
+ * Lock methods are identified by LOCKMETHODID.  (Despite the declaration as
+ * uint16, we are constrained to 256 lockmethods by the layout of LOCKTAG.)
+ */
+typedef uint16 LOCKMETHODID;
+
+/* These identify the known lock methods */
+#define DEFAULT_LOCKMETHOD	1
+#define USER_LOCKMETHOD		2
+
+/*
+ * LOCKTAG is the key information needed to look up a LOCK item in the
+ * lock hashtable.  A LOCKTAG value uniquely identifies a lockable object.
+ *
+ * The LockTagType enum defines the different kinds of objects we can lock.
+ * We can handle up to 256 different LockTagTypes.
+ */
+typedef enum LockTagType
+{
+	LOCKTAG_RELATION,			/* whole relation */
+	LOCKTAG_RELATION_EXTEND,	/* the right to extend a relation */
+	LOCKTAG_DATABASE_FROZEN_IDS,	/* pg_database.datfrozenxid */
+	LOCKTAG_PAGE,				/* one page of a relation */
+	LOCKTAG_TUPLE,				/* one physical tuple */
+	LOCKTAG_TRANSACTION,		/* transaction (for waiting for xact done) */
+	LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
+	LOCKTAG_SPECULATIVE_TOKEN,	/* speculative insertion Xid and token */
+	LOCKTAG_OBJECT,				/* non-relation database object */
+	LOCKTAG_USERLOCK,			/* reserved for old contrib/userlock code */
+	LOCKTAG_ADVISORY			/* advisory user locks */
+} LockTagType;
+
+#define LOCKTAG_LAST_TYPE	LOCKTAG_ADVISORY
+
+extern const char *const LockTagTypeNames[];
+
+/*
+ * The LOCKTAG struct is defined with malice aforethought to fit into 16
+ * bytes with no padding.  Note that this would need adjustment if we were
+ * to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
+ *
+ * We include lockmethodid in the locktag so that a single hash table in
+ * shared memory can store locks of different lockmethods.
+ */
+typedef struct LOCKTAG
+{
+	uint32		locktag_field1; /* a 32-bit ID field */
+	uint32		locktag_field2; /* a 32-bit ID field */
+	uint32		locktag_field3; /* a 32-bit ID field */
+	uint16		locktag_field4; /* a 16-bit ID field */
+	uint8		locktag_type;	/* see enum LockTagType */
+	uint8		locktag_lockmethodid;	/* lockmethod indicator */
+} LOCKTAG;
+
+/*
+ * These macros define how we map logical IDs of lockable objects into
+ * the physical fields of LOCKTAG.  Use these to set up LOCKTAG values,
+ * rather than accessing the fields directly.  Note multiple eval of target!
+ */
+
+/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
+#define SET_LOCKTAG_RELATION(locktag,dboid,reloid) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_RELATION, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* same ID info as RELATION */
+#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* ID info for frozen IDs is DB OID */
+#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = 0, \
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* ID info for a page is RELATION info + BlockNumber */
+#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = (blocknum), \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_PAGE, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* ID info for a tuple is PAGE info + OffsetNumber */
+#define SET_LOCKTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = (blocknum), \
+	 (locktag).locktag_field4 = (offnum), \
+	 (locktag).locktag_type = LOCKTAG_TUPLE, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* ID info for a transaction is its TransactionId */
+#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
+	((locktag).locktag_field1 = (xid), \
+	 (locktag).locktag_field2 = 0, \
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_TRANSACTION, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/* ID info for a virtual transaction is its VirtualTransactionId */
+#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
+	((locktag).locktag_field1 = (vxid).backendId, \
+	 (locktag).locktag_field2 = (vxid).localTransactionId, \
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/*
+ * ID info for a speculative insert is TRANSACTION info +
+ * its speculative insert counter.
+ */
+#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
+	((locktag).locktag_field1 = (xid), \
+	 (locktag).locktag_field2 = (token),		\
+	 (locktag).locktag_field3 = 0, \
+	 (locktag).locktag_field4 = 0, \
+	 (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/*
+ * ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID
+ *
+ * Note: object ID has same representation as in pg_depend and
+ * pg_description, but notice that we are constraining SUBID to 16 bits.
+ * Also, we use DB OID = 0 for shared objects such as tablespaces.
+ */
+#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (classoid), \
+	 (locktag).locktag_field3 = (objoid), \
+	 (locktag).locktag_field4 = (objsubid), \
+	 (locktag).locktag_type = LOCKTAG_OBJECT, \
+	 (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+#define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \
+	((locktag).locktag_field1 = (id1), \
+	 (locktag).locktag_field2 = (id2), \
+	 (locktag).locktag_field3 = (id3), \
+	 (locktag).locktag_field4 = (id4), \
+	 (locktag).locktag_type = LOCKTAG_ADVISORY, \
+	 (locktag).locktag_lockmethodid = USER_LOCKMETHOD)
+
+
+/*
+ * Per-locked-object lock information:
+ *
+ * tag -- uniquely identifies the object being locked
+ * grantMask -- bitmask for all lock types currently granted on this object.
+ * waitMask -- bitmask for all lock types currently awaited on this object.
+ * procLocks -- list of PROCLOCK objects for this lock.
+ * waitProcs -- queue of processes waiting for this lock.
+ * requested -- count of each lock type currently requested on the lock
+ *		(includes requests already granted!!).
+ * nRequested -- total requested locks of all types.
+ * granted -- count of each lock type currently granted on the lock.
+ * nGranted -- total granted locks of all types.
+ *
+ * Note: these counts count 1 for each backend.  Internally to a backend,
+ * there may be multiple grabs on a particular lock, but this is not reflected
+ * into shared memory.
+ */
+typedef struct LOCK
+{
+	/* hash key */
+	LOCKTAG		tag;			/* unique identifier of lockable object */
+
+	/* data */
+	LOCKMASK	grantMask;		/* bitmask for lock types already granted */
+	LOCKMASK	waitMask;		/* bitmask for lock types awaited */
+	SHM_QUEUE	procLocks;		/* list of PROCLOCK objects assoc. with lock */
+	PROC_QUEUE	waitProcs;		/* list of PGPROC objects waiting on lock */
+	int			requested[MAX_LOCKMODES];	/* counts of requested locks */
+	int			nRequested;		/* total of requested[] array */
+	int			granted[MAX_LOCKMODES]; /* counts of granted locks */
+	int			nGranted;		/* total of granted[] array */
+} LOCK;
+
+#define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
+#define LOCK_LOCKTAG(lock) ((LockTagType) (lock).tag.locktag_type)
+
+
+/*
+ * We may have several different backends holding or awaiting locks
+ * on the same lockable object.  We need to store some per-holder/waiter
+ * information for each such holder (or would-be holder).  This is kept in
+ * a PROCLOCK struct.
+ *
+ * PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
+ * proclock hashtable.  A PROCLOCKTAG value uniquely identifies the combination
+ * of a lockable object and a holder/waiter for that object.  (We can use
+ * pointers here because the PROCLOCKTAG need only be unique for the lifespan
+ * of the PROCLOCK, and it will never outlive the lock or the proc.)
+ *
+ * Internally to a backend, it is possible for the same lock to be held
+ * for different purposes: the backend tracks transaction locks separately
+ * from session locks.  However, this is not reflected in the shared-memory
+ * state: we only track which backend(s) hold the lock.  This is OK since a
+ * backend can never block itself.
+ *
+ * The holdMask field shows the already-granted locks represented by this
+ * proclock.  Note that there will be a proclock object, possibly with
+ * zero holdMask, for any lock that the process is currently waiting on.
+ * Otherwise, proclock objects whose holdMasks are zero are recycled
+ * as soon as convenient.
+ *
+ * releaseMask is workspace for LockReleaseAll(): it shows the locks due
+ * to be released during the current call.  This must only be examined or
+ * set by the backend owning the PROCLOCK.
+ *
+ * Each PROCLOCK object is linked into lists for both the associated LOCK
+ * object and the owning PGPROC object.  Note that the PROCLOCK is entered
+ * into these lists as soon as it is created, even if no lock has yet been
+ * granted.  A PGPROC that is waiting for a lock to be granted will also be
+ * linked into the lock's waitProcs queue.
+ */
+typedef struct PROCLOCKTAG
+{
+	/* NB: we assume this struct contains no padding! */
+	LOCK	   *myLock;			/* link to per-lockable-object information */
+	PGPROC	   *myProc;			/* link to PGPROC of owning backend */
+} PROCLOCKTAG;
+
+typedef struct PROCLOCK
+{
+	/* tag */
+	PROCLOCKTAG tag;			/* unique identifier of proclock object */
+
+	/* data */
+	PGPROC	   *groupLeader;	/* proc's lock group leader, or proc itself */
+	LOCKMASK	holdMask;		/* bitmask for lock types currently held */
+	LOCKMASK	releaseMask;	/* bitmask for lock types to be released */
+	SHM_QUEUE	lockLink;		/* list link in LOCK's list of proclocks */
+	SHM_QUEUE	procLink;		/* list link in PGPROC's list of proclocks */
+} PROCLOCK;
+
+#define PROCLOCK_LOCKMETHOD(proclock) \
+	LOCK_LOCKMETHOD(*((proclock).tag.myLock))
+
+/*
+ * Each backend also maintains a local hash table with information about each
+ * lock it is currently interested in.  In particular the local table counts
+ * the number of times that lock has been acquired.  This allows multiple
+ * requests for the same lock to be executed without additional accesses to
+ * shared memory.  We also track the number of lock acquisitions per
+ * ResourceOwner, so that we can release just those locks belonging to a
+ * particular ResourceOwner.
+ *
+ * When holding a lock taken "normally", the lock and proclock fields always
+ * point to the associated objects in shared memory.  However, if we acquired
+ * the lock via the fast-path mechanism, the lock and proclock fields are set
+ * to NULL, since there probably aren't any such objects in shared memory.
+ * (If the lock later gets promoted to normal representation, we may eventually
+ * update our locallock's lock/proclock fields after finding the shared
+ * objects.)
+ *
+ * Caution: a locallock object can be left over from a failed lock acquisition
+ * attempt.  In this case its lock/proclock fields are untrustworthy, since
+ * the shared lock object is neither held nor awaited, and hence is available
+ * to be reclaimed.  If nLocks > 0 then these pointers must either be valid or
+ * NULL, but when nLocks == 0 they should be considered garbage.
+ */
+typedef struct LOCALLOCKTAG
+{
+	LOCKTAG		lock;			/* identifies the lockable object */
+	LOCKMODE	mode;			/* lock mode for this table entry */
+} LOCALLOCKTAG;
+
+typedef struct LOCALLOCKOWNER
+{
+	/*
+	 * Note: if owner is NULL then the lock is held on behalf of the session;
+	 * otherwise it is held on behalf of my current transaction.
+	 *
+	 * Must use a forward struct reference to avoid circularity.
+	 */
+	struct ResourceOwnerData *owner;
+	int64		nLocks;			/* # of times held by this owner */
+} LOCALLOCKOWNER;
+
+typedef struct LOCALLOCK
+{
+	/* tag */
+	LOCALLOCKTAG tag;			/* unique identifier of locallock entry */
+
+	/* data */
+	uint32		hashcode;		/* copy of LOCKTAG's hash value */
+	LOCK	   *lock;			/* associated LOCK object, if any */
+	PROCLOCK   *proclock;		/* associated PROCLOCK object, if any */
+	int64		nLocks;			/* total number of times lock is held */
+	int			numLockOwners;	/* # of relevant ResourceOwners */
+	int			maxLockOwners;	/* allocated size of array */
+	LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
+	bool		holdsStrongLockCount;	/* bumped FastPathStrongRelationLocks */
+	bool		lockCleared;	/* we read all sinval msgs for lock */
+} LOCALLOCK;
+
+#define LOCALLOCK_LOCKMETHOD(llock) ((llock).tag.lock.locktag_lockmethodid)
+#define LOCALLOCK_LOCKTAG(llock) ((LockTagType) (llock).tag.lock.locktag_type)
+
+
+/*
+ * These structures hold information passed from lmgr internals to the lock
+ * listing user-level functions (in lockfuncs.c).
+ */
+
+typedef struct LockInstanceData
+{
+	LOCKTAG		locktag;		/* tag for locked object */
+	LOCKMASK	holdMask;		/* locks held by this PGPROC */
+	LOCKMODE	waitLockMode;	/* lock awaited by this PGPROC, if any */
+	BackendId	backend;		/* backend ID of this PGPROC */
+	LocalTransactionId lxid;	/* local transaction ID of this PGPROC */
+	int			pid;			/* pid of this PGPROC */
+	int			leaderPid;		/* pid of group leader; = pid if no group */
+	bool		fastpath;		/* taken via fastpath? */
+} LockInstanceData;
+
+typedef struct LockData
+{
+	int			nelements;		/* The length of the array */
+	LockInstanceData *locks;	/* Array of per-PROCLOCK information */
+} LockData;
+
+typedef struct BlockedProcData
+{
+	int			pid;			/* pid of a blocked PGPROC */
+	/* Per-PROCLOCK information about PROCLOCKs of the lock the pid awaits */
+	/* (these fields refer to indexes in BlockedProcsData.locks[]) */
+	int			first_lock;		/* index of first relevant LockInstanceData */
+	int			num_locks;		/* number of relevant LockInstanceDatas */
+	/* PIDs of PGPROCs that are ahead of "pid" in the lock's wait queue */
+	/* (these fields refer to indexes in BlockedProcsData.waiter_pids[]) */
+	int			first_waiter;	/* index of first preceding waiter */
+	int			num_waiters;	/* number of preceding waiters */
+} BlockedProcData;
+
+typedef struct BlockedProcsData
+{
+	BlockedProcData *procs;		/* Array of per-blocked-proc information */
+	LockInstanceData *locks;	/* Array of per-PROCLOCK information */
+	int		   *waiter_pids;	/* Array of PIDs of other blocked PGPROCs */
+	int			nprocs;			/* # of valid entries in procs[] array */
+	int			maxprocs;		/* Allocated length of procs[] array */
+	int			nlocks;			/* # of valid entries in locks[] array */
+	int			maxlocks;		/* Allocated length of locks[] array */
+	int			npids;			/* # of valid entries in waiter_pids[] array */
+	int			maxpids;		/* Allocated length of waiter_pids[] array */
+} BlockedProcsData;
+
+
+/* Result codes for LockAcquire() */
+typedef enum
+{
+	LOCKACQUIRE_NOT_AVAIL,		/* lock not available, and dontWait=true */
+	LOCKACQUIRE_OK,				/* lock successfully acquired */
+	LOCKACQUIRE_ALREADY_HELD,	/* incremented count for lock already held */
+	LOCKACQUIRE_ALREADY_CLEAR	/* incremented count for lock already clear */
+} LockAcquireResult;
+
+/* Deadlock states identified by DeadLockCheck() */
+typedef enum
+{
+	DS_NOT_YET_CHECKED,			/* no deadlock check has run yet */
+	DS_NO_DEADLOCK,				/* no deadlock detected */
+	DS_SOFT_DEADLOCK,			/* deadlock avoided by queue rearrangement */
+	DS_HARD_DEADLOCK,			/* deadlock, no way out but ERROR */
+	DS_BLOCKED_BY_AUTOVACUUM	/* no deadlock; queue blocked by autovacuum
+								 * worker */
+} DeadLockState;
+
+/*
+ * The lockmgr's shared hash tables are partitioned to reduce contention.
+ * To determine which partition a given locktag belongs to, compute the tag's
+ * hash code with LockTagHashCode(), then apply one of these macros.
+ * NB: NUM_LOCK_PARTITIONS must be a power of 2!
+ */
+#define LockHashPartition(hashcode) \
+	((hashcode) % NUM_LOCK_PARTITIONS)
+#define LockHashPartitionLock(hashcode) \
+	(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \
+		LockHashPartition(hashcode)].lock)
+#define LockHashPartitionLockByIndex(i) \
+	(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
+
+/*
+ * The deadlock detector needs to be able to access lockGroupLeader and
+ * related fields in the PGPROC, so we arrange for those fields to be protected
+ * by one of the lock hash partition locks.  Since the deadlock detector
+ * acquires all such locks anyway, this makes it safe for it to access these
+ * fields without doing anything extra.  To avoid contention as much as
+ * possible, we map different PGPROCs to different partition locks.  The lock
+ * used for a given lock group is determined by the group leader's pgprocno.
+ */
+#define LockHashPartitionLockByProc(leader_pgproc) \
+	LockHashPartitionLock((leader_pgproc)->pgprocno)
+
+/*
+ * function prototypes
+ */
+extern void InitLocks(void);
+extern LockMethod GetLocksMethodTable(const LOCK *lock);
+extern LockMethod GetLockTagsMethodTable(const LOCKTAG *locktag);
+extern uint32 LockTagHashCode(const LOCKTAG *locktag);
+extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
+extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
+									 LOCKMODE lockmode,
+									 bool sessionLock,
+									 bool dontWait);
+extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
+											 LOCKMODE lockmode,
+											 bool sessionLock,
+											 bool dontWait,
+											 bool reportMemoryError,
+											 LOCALLOCK **locallockp);
+extern void AbortStrongLockAcquire(void);
+extern void MarkLockClear(LOCALLOCK *locallock);
+extern bool LockRelease(const LOCKTAG *locktag,
+						LOCKMODE lockmode, bool sessionLock);
+extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
+extern void LockReleaseSession(LOCKMETHODID lockmethodid);
+extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
+extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
+extern bool LockHeldByMe(const LOCKTAG *locktag, LOCKMODE lockmode);
+#ifdef USE_ASSERT_CHECKING
+extern HTAB *GetLockMethodLocalHash(void);
+#endif
+extern bool LockHasWaiters(const LOCKTAG *locktag,
+						   LOCKMODE lockmode, bool sessionLock);
+extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
+											  LOCKMODE lockmode, int *countp);
+extern void AtPrepare_Locks(void);
+extern void PostPrepare_Locks(TransactionId xid);
+extern bool LockCheckConflicts(LockMethod lockMethodTable,
+							   LOCKMODE lockmode,
+							   LOCK *lock, PROCLOCK *proclock);
+extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
+extern void GrantAwaitedLock(void);
+extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
+extern Size LockShmemSize(void);
+extern LockData *GetLockStatusData(void);
+extern BlockedProcsData *GetBlockerStatusData(int blocked_pid);
+
+extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks);
+extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode);
+
+extern void lock_twophase_recover(TransactionId xid, uint16 info,
+								  void *recdata, uint32 len);
+extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
+									 void *recdata, uint32 len);
+extern void lock_twophase_postabort(TransactionId xid, uint16 info,
+									void *recdata, uint32 len);
+extern void lock_twophase_standby_recover(TransactionId xid, uint16 info,
+										  void *recdata, uint32 len);
+
+extern DeadLockState DeadLockCheck(PGPROC *proc);
+extern PGPROC *GetBlockingAutoVacuumPgproc(void);
+extern void DeadLockReport(void) pg_attribute_noreturn();
+extern void RememberSimpleDeadLock(PGPROC *proc1,
+								   LOCKMODE lockmode,
+								   LOCK *lock,
+								   PGPROC *proc2);
+extern void InitDeadLockChecking(void);
+
+extern int	LockWaiterCount(const LOCKTAG *locktag);
+
+#ifdef LOCK_DEBUG
+extern void DumpLocks(PGPROC *proc);
+extern void DumpAllLocks(void);
+#endif
+
+/* Lock a VXID (used to wait for a transaction to finish) */
+extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
+extern void VirtualXactLockTableCleanup(void);
+extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait);
+
+#endif							/* LOCK_H_ */
diff --git a/src/include/storage/lockdefs.h b/src/include/storage/lockdefs.h
new file mode 100644
index 0000000..ef0daf7
--- /dev/null
+++ b/src/include/storage/lockdefs.h
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * lockdefs.h
+ *	   Frontend exposed parts of postgres' low level lock mechanism
+ *
+ * The split between lockdefs.h and lock.h is not very principled. This file
+ * contains definition that have to (indirectly) be available when included by
+ * FRONTEND code.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/lockdefs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOCKDEFS_H_
+#define LOCKDEFS_H_
+
+/*
+ * LOCKMODE is an integer (1..N) indicating a lock type.  LOCKMASK is a bit
+ * mask indicating a set of held or requested lock types (the bit 1<<mode
+ * corresponds to a particular lock mode).
+ */
+typedef int LOCKMASK;
+typedef int LOCKMODE;
+
+/*
+ * These are the valid values of type LOCKMODE for all the standard lock
+ * methods (both DEFAULT and USER).
+ */
+
+/* NoLock is not a lock mode, but a flag value meaning "don't get a lock" */
+#define NoLock					0
+
+#define AccessShareLock			1	/* SELECT */
+#define RowShareLock			2	/* SELECT FOR UPDATE/FOR SHARE */
+#define RowExclusiveLock		3	/* INSERT, UPDATE, DELETE */
+#define ShareUpdateExclusiveLock 4	/* VACUUM (non-FULL),ANALYZE, CREATE INDEX
+									 * CONCURRENTLY */
+#define ShareLock				5	/* CREATE INDEX (WITHOUT CONCURRENTLY) */
+#define ShareRowExclusiveLock	6	/* like EXCLUSIVE MODE, but allows ROW
+									 * SHARE */
+#define ExclusiveLock			7	/* blocks ROW SHARE/SELECT...FOR UPDATE */
+#define AccessExclusiveLock		8	/* ALTER TABLE, DROP TABLE, VACUUM FULL,
+									 * and unqualified LOCK TABLE */
+
+#define MaxLockMode				8
+
+
+/* WAL representation of an AccessExclusiveLock on a table */
+typedef struct xl_standby_lock
+{
+	TransactionId xid;			/* xid of holder of AccessExclusiveLock */
+	Oid			dbOid;			/* DB containing table */
+	Oid			relOid;			/* OID of table */
+} xl_standby_lock;
+
+#endif							/* LOCKDEFS_H_ */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
new file mode 100644
index 0000000..c04ae97
--- /dev/null
+++ b/src/include/storage/lwlock.h
@@ -0,0 +1,232 @@
+/*-------------------------------------------------------------------------
+ *
+ * lwlock.h
+ *	  Lightweight lock manager
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/lwlock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LWLOCK_H
+#define LWLOCK_H
+
+#ifdef FRONTEND
+#error "lwlock.h may not be included from frontend code"
+#endif
+
+#include "port/atomics.h"
+#include "storage/proclist_types.h"
+#include "storage/s_lock.h"
+
+struct PGPROC;
+
+/*
+ * Code outside of lwlock.c should not manipulate the contents of this
+ * structure directly, but we have to declare it here to allow LWLocks to be
+ * incorporated into other data structures.
+ */
+typedef struct LWLock
+{
+	uint16		tranche;		/* tranche ID */
+	pg_atomic_uint32 state;		/* state of exclusive/nonexclusive lockers */
+	proclist_head waiters;		/* list of waiting PGPROCs */
+#ifdef LOCK_DEBUG
+	pg_atomic_uint32 nwaiters;	/* number of waiters */
+	struct PGPROC *owner;		/* last exclusive owner of the lock */
+#endif
+} LWLock;
+
+/*
+ * In most cases, it's desirable to force each tranche of LWLocks to be aligned
+ * on a cache line boundary and make the array stride a power of 2.  This saves
+ * a few cycles in indexing, but more importantly ensures that individual
+ * LWLocks don't cross cache line boundaries.  This reduces cache contention
+ * problems, especially on AMD Opterons.  In some cases, it's useful to add
+ * even more padding so that each LWLock takes up an entire cache line; this is
+ * useful, for example, in the main LWLock array, where the overall number of
+ * locks is small but some are heavily contended.
+ *
+ * When allocating a tranche that contains data other than LWLocks, it is
+ * probably best to include a bare LWLock and then pad the resulting structure
+ * as necessary for performance.  For an array that contains only LWLocks,
+ * LWLockMinimallyPadded can be used for cases where we just want to ensure
+ * that we don't cross cache line boundaries within a single lock, while
+ * LWLockPadded can be used for cases where we want each lock to be an entire
+ * cache line.
+ *
+ * An LWLockMinimallyPadded might contain more than the absolute minimum amount
+ * of padding required to keep a lock from crossing a cache line boundary,
+ * because an unpadded LWLock will normally fit into 16 bytes.  We ignore that
+ * possibility when determining the minimal amount of padding.  Older releases
+ * had larger LWLocks, so 32 really was the minimum, and packing them in
+ * tighter might hurt performance.
+ *
+ * LWLOCK_MINIMAL_SIZE should be 32 on basically all common platforms, but
+ * because pg_atomic_uint32 is more than 4 bytes on some obscure platforms, we
+ * allow for the possibility that it might be 64.  Even on those platforms,
+ * we probably won't exceed 32 bytes unless LOCK_DEBUG is defined.
+ */
+#define LWLOCK_PADDED_SIZE	PG_CACHE_LINE_SIZE
+#define LWLOCK_MINIMAL_SIZE (sizeof(LWLock) <= 32 ? 32 : 64)
+
+/* LWLock, padded to a full cache line size */
+typedef union LWLockPadded
+{
+	LWLock		lock;
+	char		pad[LWLOCK_PADDED_SIZE];
+} LWLockPadded;
+
+/* LWLock, minimally padded */
+typedef union LWLockMinimallyPadded
+{
+	LWLock		lock;
+	char		pad[LWLOCK_MINIMAL_SIZE];
+} LWLockMinimallyPadded;
+
+extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
+
+/* struct for storing named tranche information */
+typedef struct NamedLWLockTranche
+{
+	int			trancheId;
+	char	   *trancheName;
+} NamedLWLockTranche;
+
+extern PGDLLIMPORT NamedLWLockTranche *NamedLWLockTrancheArray;
+extern PGDLLIMPORT int NamedLWLockTrancheRequests;
+
+/* Names for fixed lwlocks */
+#include "storage/lwlocknames.h"
+
+/*
+ * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
+ * here, but we need them to figure out offsets within MainLWLockArray, and
+ * having this file include lock.h or bufmgr.h would be backwards.
+ */
+
+/* Number of partitions of the shared buffer mapping hashtable */
+#define NUM_BUFFER_PARTITIONS  128
+
+/* Number of partitions the shared lock tables are divided into */
+#define LOG2_NUM_LOCK_PARTITIONS  4
+#define NUM_LOCK_PARTITIONS  (1 << LOG2_NUM_LOCK_PARTITIONS)
+
+/* Number of partitions the shared predicate lock tables are divided into */
+#define LOG2_NUM_PREDICATELOCK_PARTITIONS  4
+#define NUM_PREDICATELOCK_PARTITIONS  (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
+
+/* Offsets for various chunks of preallocated lwlocks. */
+#define BUFFER_MAPPING_LWLOCK_OFFSET	NUM_INDIVIDUAL_LWLOCKS
+#define LOCK_MANAGER_LWLOCK_OFFSET		\
+	(BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS)
+#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \
+	(LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS)
+#define NUM_FIXED_LWLOCKS \
+	(PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS)
+
+typedef enum LWLockMode
+{
+	LW_EXCLUSIVE,
+	LW_SHARED,
+	LW_WAIT_UNTIL_FREE			/* A special mode used in PGPROC->lwWaitMode,
+								 * when waiting for lock to become free. Not
+								 * to be used as LWLockAcquire argument */
+} LWLockMode;
+
+
+#ifdef LOCK_DEBUG
+extern bool Trace_lwlocks;
+#endif
+
+extern bool LWLockAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode);
+extern void LWLockRelease(LWLock *lock);
+extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
+extern void LWLockReleaseAll(void);
+extern bool LWLockHeldByMe(LWLock *lock);
+extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
+
+extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
+extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
+
+extern Size LWLockShmemSize(void);
+extern void CreateLWLocks(void);
+extern void InitLWLockAccess(void);
+
+extern const char *GetLWLockIdentifier(uint32 classId, uint16 eventId);
+
+/*
+ * Extensions (or core code) can obtain an LWLocks by calling
+ * RequestNamedLWLockTranche() during postmaster startup.  Subsequently,
+ * call GetNamedLWLockTranche() to obtain a pointer to an array containing
+ * the number of LWLocks requested.
+ */
+extern void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks);
+extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name);
+
+/*
+ * There is another, more flexible method of obtaining lwlocks. First, call
+ * LWLockNewTrancheId just once to obtain a tranche ID; this allocates from
+ * a shared counter.  Next, each individual process using the tranche should
+ * call LWLockRegisterTranche() to associate that tranche ID with a name.
+ * Finally, LWLockInitialize should be called just once per lwlock, passing
+ * the tranche ID as an argument.
+ *
+ * It may seem strange that each process using the tranche must register it
+ * separately, but dynamic shared memory segments aren't guaranteed to be
+ * mapped at the same address in all coordinating backends, so storing the
+ * registration in the main shared memory segment wouldn't work for that case.
+ */
+extern int	LWLockNewTrancheId(void);
+extern void LWLockRegisterTranche(int tranche_id, const char *tranche_name);
+extern void LWLockInitialize(LWLock *lock, int tranche_id);
+
+/*
+ * Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also,
+ * we reserve additional tranche IDs for builtin tranches not included in
+ * the set of individual LWLocks.  A call to LWLockNewTrancheId will never
+ * return a value less than LWTRANCHE_FIRST_USER_DEFINED.
+ */
+typedef enum BuiltinTrancheIds
+{
+	LWTRANCHE_XACT_BUFFER = NUM_INDIVIDUAL_LWLOCKS,
+	LWTRANCHE_COMMITTS_BUFFER,
+	LWTRANCHE_SUBTRANS_BUFFER,
+	LWTRANCHE_MULTIXACTOFFSET_BUFFER,
+	LWTRANCHE_MULTIXACTMEMBER_BUFFER,
+	LWTRANCHE_NOTIFY_BUFFER,
+	LWTRANCHE_SERIAL_BUFFER,
+	LWTRANCHE_WAL_INSERT,
+	LWTRANCHE_BUFFER_CONTENT,
+	LWTRANCHE_BUFFER_IO,
+	LWTRANCHE_REPLICATION_ORIGIN_STATE,
+	LWTRANCHE_REPLICATION_SLOT_IO,
+	LWTRANCHE_LOCK_FASTPATH,
+	LWTRANCHE_BUFFER_MAPPING,
+	LWTRANCHE_LOCK_MANAGER,
+	LWTRANCHE_PREDICATE_LOCK_MANAGER,
+	LWTRANCHE_PARALLEL_HASH_JOIN,
+	LWTRANCHE_PARALLEL_QUERY_DSA,
+	LWTRANCHE_PER_SESSION_DSA,
+	LWTRANCHE_PER_SESSION_RECORD_TYPE,
+	LWTRANCHE_PER_SESSION_RECORD_TYPMOD,
+	LWTRANCHE_SHARED_TUPLESTORE,
+	LWTRANCHE_SHARED_TIDBITMAP,
+	LWTRANCHE_PARALLEL_APPEND,
+	LWTRANCHE_PER_XACT_PREDICATE_LIST,
+	LWTRANCHE_FIRST_USER_DEFINED
+}			BuiltinTrancheIds;
+
+/*
+ * Prior to PostgreSQL 9.4, we used an enum type called LWLockId to refer
+ * to LWLocks.  New code should instead use LWLock *.  However, for the
+ * convenience of third-party code, we include the following typedef.
+ */
+typedef LWLock *LWLockId;
+
+#endif							/* LWLOCK_H */
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
new file mode 100644
index 0000000..07fd1bb
--- /dev/null
+++ b/src/include/storage/md.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * md.h
+ *	  magnetic disk storage manager public interface declarations.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/md.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MD_H
+#define MD_H
+
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+#include "storage/smgr.h"
+#include "storage/sync.h"
+
+/* md storage manager functionality */
+extern void mdinit(void);
+extern void mdopen(SMgrRelation reln);
+extern void mdclose(SMgrRelation reln, ForkNumber forknum);
+extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
+extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
+extern void mdextend(SMgrRelation reln, ForkNumber forknum,
+					 BlockNumber blocknum, char *buffer, bool skipFsync);
+extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
+					   BlockNumber blocknum);
+extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+				   char *buffer);
+extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
+					BlockNumber blocknum, char *buffer, bool skipFsync);
+extern void mdwriteback(SMgrRelation reln, ForkNumber forknum,
+						BlockNumber blocknum, BlockNumber nblocks);
+extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
+extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
+					   BlockNumber nblocks);
+extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
+
+extern void ForgetDatabaseSyncRequests(Oid dbid);
+extern void DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo);
+
+/* md sync callbacks */
+extern int	mdsyncfiletag(const FileTag *ftag, char *path);
+extern int	mdunlinkfiletag(const FileTag *ftag, char *path);
+extern bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate);
+
+#endif							/* MD_H */
diff --git a/src/include/storage/off.h b/src/include/storage/off.h
new file mode 100644
index 0000000..9bfdec8
--- /dev/null
+++ b/src/include/storage/off.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * off.h
+ *	  POSTGRES disk "offset" definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/off.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef OFF_H
+#define OFF_H
+
+#include "storage/itemid.h"
+/*
+ * OffsetNumber:
+ *
+ * this is a 1-based index into the linp (ItemIdData) array in the
+ * header of each disk page.
+ */
+typedef uint16 OffsetNumber;
+
+#define InvalidOffsetNumber		((OffsetNumber) 0)
+#define FirstOffsetNumber		((OffsetNumber) 1)
+#define MaxOffsetNumber			((OffsetNumber) (BLCKSZ / sizeof(ItemIdData)))
+
+/* ----------------
+ *		support macros
+ * ----------------
+ */
+
+/*
+ * OffsetNumberIsValid
+ *		True iff the offset number is valid.
+ */
+#define OffsetNumberIsValid(offsetNumber) \
+	((bool) ((offsetNumber != InvalidOffsetNumber) && \
+			 (offsetNumber <= MaxOffsetNumber)))
+
+/*
+ * OffsetNumberNext
+ * OffsetNumberPrev
+ *		Increments/decrements the argument.  These macros look pointless
+ *		but they help us disambiguate the different manipulations on
+ *		OffsetNumbers (e.g., sometimes we subtract one from an
+ *		OffsetNumber to move back, and sometimes we do so to form a
+ *		real C array index).
+ */
+#define OffsetNumberNext(offsetNumber) \
+	((OffsetNumber) (1 + (offsetNumber)))
+#define OffsetNumberPrev(offsetNumber) \
+	((OffsetNumber) (-1 + (offsetNumber)))
+
+#endif							/* OFF_H */
diff --git a/src/include/storage/pg_sema.h b/src/include/storage/pg_sema.h
new file mode 100644
index 0000000..ef8a7b4
--- /dev/null
+++ b/src/include/storage/pg_sema.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_sema.h
+ *	  Platform-independent API for semaphores.
+ *
+ * PostgreSQL requires counting semaphores (the kind that keep track of
+ * multiple unlock operations, and will allow an equal number of subsequent
+ * lock operations before blocking).  The underlying implementation is
+ * not the same on every platform.  This file defines the API that must
+ * be provided by each port.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/pg_sema.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_SEMA_H
+#define PG_SEMA_H
+
+/*
+ * struct PGSemaphoreData and pointer type PGSemaphore are the data structure
+ * representing an individual semaphore.  The contents of PGSemaphoreData vary
+ * across implementations and must never be touched by platform-independent
+ * code; hence, PGSemaphoreData is declared as an opaque struct here.
+ *
+ * However, Windows is sufficiently unlike our other ports that it doesn't
+ * seem worth insisting on ABI compatibility for Windows too.  Hence, on
+ * that platform just define PGSemaphore as HANDLE.
+ */
+#ifndef USE_WIN32_SEMAPHORES
+typedef struct PGSemaphoreData *PGSemaphore;
+#else
+typedef HANDLE PGSemaphore;
+#endif
+
+
+/* Report amount of shared memory needed */
+extern Size PGSemaphoreShmemSize(int maxSemas);
+
+/* Module initialization (called during postmaster start or shmem reinit) */
+extern void PGReserveSemaphores(int maxSemas);
+
+/* Allocate a PGSemaphore structure with initial count 1 */
+extern PGSemaphore PGSemaphoreCreate(void);
+
+/* Reset a previously-initialized PGSemaphore to have count 0 */
+extern void PGSemaphoreReset(PGSemaphore sema);
+
+/* Lock a semaphore (decrement count), blocking if count would be < 0 */
+extern void PGSemaphoreLock(PGSemaphore sema);
+
+/* Unlock a semaphore (increment count) */
+extern void PGSemaphoreUnlock(PGSemaphore sema);
+
+/* Lock a semaphore only if able to do so without blocking */
+extern bool PGSemaphoreTryLock(PGSemaphore sema);
+
+#endif							/* PG_SEMA_H */
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
new file mode 100644
index 0000000..0de26b3
--- /dev/null
+++ b/src/include/storage/pg_shmem.h
@@ -0,0 +1,90 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_shmem.h
+ *	  Platform-independent API for shared memory support.
+ *
+ * Every port is expected to support shared memory with approximately
+ * SysV-ish semantics; in particular, a memory block is not anonymous
+ * but has an ID, and we must be able to tell whether there are any
+ * remaining processes attached to a block of a specified ID.
+ *
+ * To simplify life for the SysV implementation, the ID is assumed to
+ * consist of two unsigned long values (these are key and ID in SysV
+ * terms).  Other platforms may ignore the second value if they need
+ * only one ID number.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/pg_shmem.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_SHMEM_H
+#define PG_SHMEM_H
+
+#include "storage/dsm_impl.h"
+
+typedef struct PGShmemHeader	/* standard header for all Postgres shmem */
+{
+	int32		magic;			/* magic # to identify Postgres segments */
+#define PGShmemMagic  679834894
+	pid_t		creatorPID;		/* PID of creating process (set but unread) */
+	Size		totalsize;		/* total size of segment */
+	Size		freeoffset;		/* offset to first free space */
+	dsm_handle	dsm_control;	/* ID of dynamic shared memory control seg */
+	void	   *index;			/* pointer to ShmemIndex table */
+#ifndef WIN32					/* Windows doesn't have useful inode#s */
+	dev_t		device;			/* device data directory is on */
+	ino_t		inode;			/* inode number of data directory */
+#endif
+} PGShmemHeader;
+
+/* GUC variables */
+extern int	shared_memory_type;
+extern int	huge_pages;
+
+/* Possible values for huge_pages */
+typedef enum
+{
+	HUGE_PAGES_OFF,
+	HUGE_PAGES_ON,
+	HUGE_PAGES_TRY
+}			HugePagesType;
+
+/* Possible values for shared_memory_type */
+typedef enum
+{
+	SHMEM_TYPE_WINDOWS,
+	SHMEM_TYPE_SYSV,
+	SHMEM_TYPE_MMAP
+}			PGShmemType;
+
+#ifndef WIN32
+extern unsigned long UsedShmemSegID;
+#else
+extern HANDLE UsedShmemSegID;
+extern void *ShmemProtectiveRegion;
+#endif
+extern void *UsedShmemSegAddr;
+
+#if !defined(WIN32) && !defined(EXEC_BACKEND)
+#define DEFAULT_SHARED_MEMORY_TYPE SHMEM_TYPE_MMAP
+#elif !defined(WIN32)
+#define DEFAULT_SHARED_MEMORY_TYPE SHMEM_TYPE_SYSV
+#else
+#define DEFAULT_SHARED_MEMORY_TYPE SHMEM_TYPE_WINDOWS
+#endif
+
+#ifdef EXEC_BACKEND
+extern void PGSharedMemoryReAttach(void);
+extern void PGSharedMemoryNoReAttach(void);
+#endif
+
+extern PGShmemHeader *PGSharedMemoryCreate(Size size,
+										   PGShmemHeader **shim);
+extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
+extern void PGSharedMemoryDetach(void);
+
+#endif							/* PG_SHMEM_H */
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
new file mode 100644
index 0000000..56c5ec4
--- /dev/null
+++ b/src/include/storage/pmsignal.h
@@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ * pmsignal.h
+ *	  routines for signaling the postmaster from its child processes
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/pmsignal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PMSIGNAL_H
+#define PMSIGNAL_H
+
+#include <signal.h>
+
+#ifdef HAVE_SYS_PRCTL_H
+#include "sys/prctl.h"
+#endif
+
+#ifdef HAVE_SYS_PROCCTL_H
+#include "sys/procctl.h"
+#endif
+
+/*
+ * Reasons for signaling the postmaster.  We can cope with simultaneous
+ * signals for different reasons.  If the same reason is signaled multiple
+ * times in quick succession, however, the postmaster is likely to observe
+ * only one notification of it.  This is okay for the present uses.
+ */
+typedef enum
+{
+	PMSIGNAL_RECOVERY_STARTED,	/* recovery has started */
+	PMSIGNAL_BEGIN_HOT_STANDBY, /* begin Hot Standby */
+	PMSIGNAL_WAKEN_ARCHIVER,	/* send a NOTIFY signal to xlog archiver */
+	PMSIGNAL_ROTATE_LOGFILE,	/* send SIGUSR1 to syslogger to rotate logfile */
+	PMSIGNAL_START_AUTOVAC_LAUNCHER,	/* start an autovacuum launcher */
+	PMSIGNAL_START_AUTOVAC_WORKER,	/* start an autovacuum worker */
+	PMSIGNAL_BACKGROUND_WORKER_CHANGE,	/* background worker state change */
+	PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
+	PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
+
+	NUM_PMSIGNALS				/* Must be last value of enum! */
+} PMSignalReason;
+
+/* PMSignalData is an opaque struct, details known only within pmsignal.c */
+typedef struct PMSignalData PMSignalData;
+
+/*
+ * prototypes for functions in pmsignal.c
+ */
+extern Size PMSignalShmemSize(void);
+extern void PMSignalShmemInit(void);
+extern void SendPostmasterSignal(PMSignalReason reason);
+extern bool CheckPostmasterSignal(PMSignalReason reason);
+extern int	AssignPostmasterChildSlot(void);
+extern bool ReleasePostmasterChildSlot(int slot);
+extern bool IsPostmasterChildWalSender(int slot);
+extern void MarkPostmasterChildActive(void);
+extern void MarkPostmasterChildInactive(void);
+extern void MarkPostmasterChildWalSender(void);
+extern bool PostmasterIsAliveInternal(void);
+extern void PostmasterDeathSignalInit(void);
+
+
+/*
+ * Do we have a way to ask for a signal on parent death?
+ *
+ * If we do, pmsignal.c will set up a signal handler, that sets a flag when
+ * the parent dies.  Checking the flag first makes PostmasterIsAlive() a lot
+ * cheaper in usual case that the postmaster is alive.
+ */
+#if (defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_PDEATHSIG)) || \
+	(defined(HAVE_SYS_PROCCTL_H) && defined(PROC_PDEATHSIG_CTL))
+#define USE_POSTMASTER_DEATH_SIGNAL
+#endif
+
+#ifdef USE_POSTMASTER_DEATH_SIGNAL
+extern volatile sig_atomic_t postmaster_possibly_dead;
+
+static inline bool
+PostmasterIsAlive(void)
+{
+	if (likely(!postmaster_possibly_dead))
+		return true;
+	return PostmasterIsAliveInternal();
+}
+#else
+#define PostmasterIsAlive() PostmasterIsAliveInternal()
+#endif
+
+#endif							/* PMSIGNAL_H */
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
new file mode 100644
index 0000000..86e756d
--- /dev/null
+++ b/src/include/storage/predicate.h
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * predicate.h
+ *	  POSTGRES public predicate locking definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/predicate.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PREDICATE_H
+#define PREDICATE_H
+
+#include "storage/lock.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+
+/*
+ * GUC variables
+ */
+extern int	max_predicate_locks_per_xact;
+extern int	max_predicate_locks_per_relation;
+extern int	max_predicate_locks_per_page;
+
+
+/* Number of SLRU buffers to use for Serial SLRU */
+#define NUM_SERIAL_BUFFERS		16
+
+/*
+ * A handle used for sharing SERIALIZABLEXACT objects between the participants
+ * in a parallel query.
+ */
+typedef void *SerializableXactHandle;
+
+/*
+ * function prototypes
+ */
+
+/* housekeeping for shared memory predicate lock structures */
+extern void InitPredicateLocks(void);
+extern Size PredicateLockShmemSize(void);
+
+extern void CheckPointPredicate(void);
+
+/* predicate lock reporting */
+extern bool PageIsPredicateLocked(Relation relation, BlockNumber blkno);
+
+/* predicate lock maintenance */
+extern Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot);
+extern void SetSerializableTransactionSnapshot(Snapshot snapshot,
+											   VirtualTransactionId *sourcevxid,
+											   int sourcepid);
+extern void RegisterPredicateLockingXid(TransactionId xid);
+extern void PredicateLockRelation(Relation relation, Snapshot snapshot);
+extern void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot);
+extern void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot,
+							 TransactionId insert_xid);
+extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
+extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
+extern void TransferPredicateLocksToHeapRelation(Relation relation);
+extern void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe);
+
+/* conflict detection (may also trigger rollback) */
+extern bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot);
+extern void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot);
+extern void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno);
+extern void CheckTableForSerializableConflictIn(Relation relation);
+
+/* final rollback checking */
+extern void PreCommit_CheckForSerializationFailure(void);
+
+/* two-phase commit support */
+extern void AtPrepare_PredicateLocks(void);
+extern void PostPrepare_PredicateLocks(TransactionId xid);
+extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
+extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,
+										   void *recdata, uint32 len);
+
+/* parallel query support */
+extern SerializableXactHandle ShareSerializableXact(void);
+extern void AttachSerializableXact(SerializableXactHandle handle);
+
+#endif							/* PREDICATE_H */
diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h
new file mode 100644
index 0000000..cf9694d
--- /dev/null
+++ b/src/include/storage/predicate_internals.h
@@ -0,0 +1,493 @@
+/*-------------------------------------------------------------------------
+ *
+ * predicate_internals.h
+ *	  POSTGRES internal predicate locking definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/predicate_internals.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PREDICATE_INTERNALS_H
+#define PREDICATE_INTERNALS_H
+
+#include "storage/lock.h"
+#include "storage/lwlock.h"
+
+/*
+ * Commit number.
+ */
+typedef uint64 SerCommitSeqNo;
+
+/*
+ * Reserved commit sequence numbers:
+ *	- 0 is reserved to indicate a non-existent SLRU entry; it cannot be
+ *	  used as a SerCommitSeqNo, even an invalid one
+ *	- InvalidSerCommitSeqNo is used to indicate a transaction that
+ *	  hasn't committed yet, so use a number greater than all valid
+ *	  ones to make comparison do the expected thing
+ *	- RecoverySerCommitSeqNo is used to refer to transactions that
+ *	  happened before a crash/recovery, since we restart the sequence
+ *	  at that point.  It's earlier than all normal sequence numbers,
+ *	  and is only used by recovered prepared transactions
+ */
+#define InvalidSerCommitSeqNo		((SerCommitSeqNo) PG_UINT64_MAX)
+#define RecoverySerCommitSeqNo		((SerCommitSeqNo) 1)
+#define FirstNormalSerCommitSeqNo	((SerCommitSeqNo) 2)
+
+/*
+ * The SERIALIZABLEXACT struct contains information needed for each
+ * serializable database transaction to support SSI techniques.
+ *
+ * A home-grown list is maintained in shared memory to manage these.
+ * An entry is used when the serializable transaction acquires a snapshot.
+ * Unless the transaction is rolled back, this entry must generally remain
+ * until all concurrent transactions have completed.  (There are special
+ * optimizations for READ ONLY transactions which often allow them to be
+ * cleaned up earlier.)  A transaction which is rolled back is cleaned up
+ * as soon as possible.
+ *
+ * Eligibility for cleanup of committed transactions is generally determined
+ * by comparing the transaction's finishedBefore field to
+ * SxactGlobalXmin.
+ */
+typedef struct SERIALIZABLEXACT
+{
+	VirtualTransactionId vxid;	/* The executing process always has one of
+								 * these. */
+
+	/*
+	 * We use two numbers to track the order that transactions commit. Before
+	 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
+	 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
+	 * This doesn't give a strict commit order, but these two values together
+	 * are good enough for us, as we can always err on the safe side and
+	 * assume that there's a conflict, if we can't be sure of the exact
+	 * ordering of two commits.
+	 *
+	 * Note that a transaction is marked as prepared for a short period during
+	 * commit processing, even if two-phase commit is not used. But with
+	 * two-phase commit, a transaction can stay in prepared state for some
+	 * time.
+	 */
+	SerCommitSeqNo prepareSeqNo;
+	SerCommitSeqNo commitSeqNo;
+
+	/* these values are not both interesting at the same time */
+	union
+	{
+		SerCommitSeqNo earliestOutConflictCommit;	/* when committed with
+													 * conflict out */
+		SerCommitSeqNo lastCommitBeforeSnapshot;	/* when not committed or
+													 * no conflict out */
+	}			SeqNo;
+	SHM_QUEUE	outConflicts;	/* list of write transactions whose data we
+								 * couldn't read. */
+	SHM_QUEUE	inConflicts;	/* list of read transactions which couldn't
+								 * see our write. */
+	SHM_QUEUE	predicateLocks; /* list of associated PREDICATELOCK objects */
+	SHM_QUEUE	finishedLink;	/* list link in
+								 * FinishedSerializableTransactions */
+
+	/*
+	 * perXactPredicateListLock is only used in parallel queries: it protects
+	 * this SERIALIZABLEXACT's predicate lock list against other workers of
+	 * the same session.
+	 */
+	LWLock		perXactPredicateListLock;
+
+	/*
+	 * for r/o transactions: list of concurrent r/w transactions that we could
+	 * potentially have conflicts with, and vice versa for r/w transactions
+	 */
+	SHM_QUEUE	possibleUnsafeConflicts;
+
+	TransactionId topXid;		/* top level xid for the transaction, if one
+								 * exists; else invalid */
+	TransactionId finishedBefore;	/* invalid means still running; else the
+									 * struct expires when no serializable
+									 * xids are before this. */
+	TransactionId xmin;			/* the transaction's snapshot xmin */
+	uint32		flags;			/* OR'd combination of values defined below */
+	int			pid;			/* pid of associated process */
+} SERIALIZABLEXACT;
+
+#define SXACT_FLAG_COMMITTED			0x00000001	/* already committed */
+#define SXACT_FLAG_PREPARED				0x00000002	/* about to commit */
+#define SXACT_FLAG_ROLLED_BACK			0x00000004	/* already rolled back */
+#define SXACT_FLAG_DOOMED				0x00000008	/* will roll back */
+/*
+ * The following flag actually means that the flagged transaction has a
+ * conflict out *to a transaction which committed ahead of it*.  It's hard
+ * to get that into a name of a reasonable length.
+ */
+#define SXACT_FLAG_CONFLICT_OUT			0x00000010
+#define SXACT_FLAG_READ_ONLY			0x00000020
+#define SXACT_FLAG_DEFERRABLE_WAITING	0x00000040
+#define SXACT_FLAG_RO_SAFE				0x00000080
+#define SXACT_FLAG_RO_UNSAFE			0x00000100
+#define SXACT_FLAG_SUMMARY_CONFLICT_IN	0x00000200
+#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
+/*
+ * The following flag means the transaction has been partially released
+ * already, but is being preserved because parallel workers might have a
+ * reference to it.  It'll be recycled by the leader at end-of-transaction.
+ */
+#define SXACT_FLAG_PARTIALLY_RELEASED	0x00000800
+
+/*
+ * The following types are used to provide an ad hoc list for holding
+ * SERIALIZABLEXACT objects.  An HTAB is overkill, since there is no need to
+ * access these by key -- there are direct pointers to these objects where
+ * needed.  If a shared memory list is created, these types can probably be
+ * eliminated in favor of using the general solution.
+ */
+typedef struct PredXactListElementData
+{
+	SHM_QUEUE	link;
+	SERIALIZABLEXACT sxact;
+}			PredXactListElementData;
+
+typedef struct PredXactListElementData *PredXactListElement;
+
+#define PredXactListElementDataSize \
+		((Size)MAXALIGN(sizeof(PredXactListElementData)))
+
+typedef struct PredXactListData
+{
+	SHM_QUEUE	availableList;
+	SHM_QUEUE	activeList;
+
+	/*
+	 * These global variables are maintained when registering and cleaning up
+	 * serializable transactions.  They must be global across all backends,
+	 * but are not needed outside the predicate.c source file. Protected by
+	 * SerializableXactHashLock.
+	 */
+	TransactionId SxactGlobalXmin;	/* global xmin for active serializable
+									 * transactions */
+	int			SxactGlobalXminCount;	/* how many active serializable
+										 * transactions have this xmin */
+	int			WritableSxactCount; /* how many non-read-only serializable
+									 * transactions are active */
+	SerCommitSeqNo LastSxactCommitSeqNo;	/* a strictly monotonically
+											 * increasing number for commits
+											 * of serializable transactions */
+	/* Protected by SerializableXactHashLock. */
+	SerCommitSeqNo CanPartialClearThrough;	/* can clear predicate locks and
+											 * inConflicts for committed
+											 * transactions through this seq
+											 * no */
+	/* Protected by SerializableFinishedListLock. */
+	SerCommitSeqNo HavePartialClearedThrough;	/* have cleared through this
+												 * seq no */
+	SERIALIZABLEXACT *OldCommittedSxact;	/* shared copy of dummy sxact */
+
+	PredXactListElement element;
+}			PredXactListData;
+
+typedef struct PredXactListData *PredXactList;
+
+#define PredXactListDataSize \
+		((Size)MAXALIGN(sizeof(PredXactListData)))
+
+
+/*
+ * The following types are used to provide lists of rw-conflicts between
+ * pairs of transactions.  Since exactly the same information is needed,
+ * they are also used to record possible unsafe transaction relationships
+ * for purposes of identifying safe snapshots for read-only transactions.
+ *
+ * When a RWConflictData is not in use to record either type of relationship
+ * between a pair of transactions, it is kept on an "available" list.  The
+ * outLink field is used for maintaining that list.
+ */
+typedef struct RWConflictData
+{
+	SHM_QUEUE	outLink;		/* link for list of conflicts out from a sxact */
+	SHM_QUEUE	inLink;			/* link for list of conflicts in to a sxact */
+	SERIALIZABLEXACT *sxactOut;
+	SERIALIZABLEXACT *sxactIn;
+}			RWConflictData;
+
+typedef struct RWConflictData *RWConflict;
+
+#define RWConflictDataSize \
+		((Size)MAXALIGN(sizeof(RWConflictData)))
+
+typedef struct RWConflictPoolHeaderData
+{
+	SHM_QUEUE	availableList;
+	RWConflict	element;
+}			RWConflictPoolHeaderData;
+
+typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
+
+#define RWConflictPoolHeaderDataSize \
+		((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
+
+
+/*
+ * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
+ * transaction or any of its subtransactions.
+ */
+typedef struct SERIALIZABLEXIDTAG
+{
+	TransactionId xid;
+} SERIALIZABLEXIDTAG;
+
+/*
+ * The SERIALIZABLEXID struct provides a link from a TransactionId for a
+ * serializable transaction to the related SERIALIZABLEXACT record, even if
+ * the transaction has completed and its connection has been closed.
+ *
+ * These are created as new top level transaction IDs are first assigned to
+ * transactions which are participating in predicate locking.  This may
+ * never happen for a particular transaction if it doesn't write anything.
+ * They are removed with their related serializable transaction objects.
+ *
+ * The SubTransGetTopmostTransaction method is used where necessary to get
+ * from an XID which might be from a subtransaction to the top level XID.
+ */
+typedef struct SERIALIZABLEXID
+{
+	/* hash key */
+	SERIALIZABLEXIDTAG tag;
+
+	/* data */
+	SERIALIZABLEXACT *myXact;	/* pointer to the top level transaction data */
+} SERIALIZABLEXID;
+
+
+/*
+ * The PREDICATELOCKTARGETTAG struct identifies a database object which can
+ * be the target of predicate locks.
+ *
+ * Note that the hash function being used doesn't properly respect tag
+ * length -- if the length of the structure isn't a multiple of four bytes it
+ * will go to a four byte boundary past the end of the tag.  If you change
+ * this struct, make sure any slack space is initialized, so that any random
+ * bytes in the middle or at the end are not included in the hash.
+ *
+ * TODO SSI: If we always use the same fields for the same type of value, we
+ * should rename these.  Holding off until it's clear there are no exceptions.
+ * Since indexes are relations with blocks and tuples, it's looking likely that
+ * the rename will be possible.  If not, we may need to divide the last field
+ * and use part of it for a target type, so that we know how to interpret the
+ * data..
+ */
+typedef struct PREDICATELOCKTARGETTAG
+{
+	uint32		locktag_field1; /* a 32-bit ID field */
+	uint32		locktag_field2; /* a 32-bit ID field */
+	uint32		locktag_field3; /* a 32-bit ID field */
+	uint32		locktag_field4; /* a 32-bit ID field */
+} PREDICATELOCKTARGETTAG;
+
+/*
+ * The PREDICATELOCKTARGET struct represents a database object on which there
+ * are predicate locks.
+ *
+ * A hash list of these objects is maintained in shared memory.  An entry is
+ * added when a predicate lock is requested on an object which doesn't
+ * already have one.  An entry is removed when the last lock is removed from
+ * its list.
+ */
+typedef struct PREDICATELOCKTARGET
+{
+	/* hash key */
+	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
+
+	/* data */
+	SHM_QUEUE	predicateLocks; /* list of PREDICATELOCK objects assoc. with
+								 * predicate lock target */
+} PREDICATELOCKTARGET;
+
+
+/*
+ * The PREDICATELOCKTAG struct identifies an individual predicate lock.
+ *
+ * It is the combination of predicate lock target (which is a lockable
+ * object) and a serializable transaction which has acquired a lock on that
+ * target.
+ */
+typedef struct PREDICATELOCKTAG
+{
+	PREDICATELOCKTARGET *myTarget;
+	SERIALIZABLEXACT *myXact;
+} PREDICATELOCKTAG;
+
+/*
+ * The PREDICATELOCK struct represents an individual lock.
+ *
+ * An entry can be created here when the related database object is read, or
+ * by promotion of multiple finer-grained targets.  All entries related to a
+ * serializable transaction are removed when that serializable transaction is
+ * cleaned up.  Entries can also be removed when they are combined into a
+ * single coarser-grained lock entry.
+ */
+typedef struct PREDICATELOCK
+{
+	/* hash key */
+	PREDICATELOCKTAG tag;		/* unique identifier of lock */
+
+	/* data */
+	SHM_QUEUE	targetLink;		/* list link in PREDICATELOCKTARGET's list of
+								 * predicate locks */
+	SHM_QUEUE	xactLink;		/* list link in SERIALIZABLEXACT's list of
+								 * predicate locks */
+	SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
+} PREDICATELOCK;
+
+
+/*
+ * The LOCALPREDICATELOCK struct represents a local copy of data which is
+ * also present in the PREDICATELOCK table, organized for fast access without
+ * needing to acquire a LWLock.  It is strictly for optimization.
+ *
+ * Each serializable transaction creates its own local hash table to hold a
+ * collection of these.  This information is used to determine when a number
+ * of fine-grained locks should be promoted to a single coarser-grained lock.
+ * The information is maintained more-or-less in parallel to the
+ * PREDICATELOCK data, but because this data is not protected by locks and is
+ * only used in an optimization heuristic, it is allowed to drift in a few
+ * corner cases where maintaining exact data would be expensive.
+ *
+ * The hash table is created when the serializable transaction acquires its
+ * snapshot, and its memory is released upon completion of the transaction.
+ */
+typedef struct LOCALPREDICATELOCK
+{
+	/* hash key */
+	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
+
+	/* data */
+	bool		held;			/* is lock held, or just its children?	*/
+	int			childLocks;		/* number of child locks currently held */
+} LOCALPREDICATELOCK;
+
+
+/*
+ * The types of predicate locks which can be acquired.
+ */
+typedef enum PredicateLockTargetType
+{
+	PREDLOCKTAG_RELATION,
+	PREDLOCKTAG_PAGE,
+	PREDLOCKTAG_TUPLE
+	/* TODO SSI: Other types may be needed for index locking */
+} PredicateLockTargetType;
+
+
+/*
+ * This structure is used to quickly capture a copy of all predicate
+ * locks.  This is currently used only by the pg_lock_status function,
+ * which in turn is used by the pg_locks view.
+ */
+typedef struct PredicateLockData
+{
+	int			nelements;
+	PREDICATELOCKTARGETTAG *locktags;
+	SERIALIZABLEXACT *xacts;
+} PredicateLockData;
+
+
+/*
+ * These macros define how we map logical IDs of lockable objects into the
+ * physical fields of PREDICATELOCKTARGETTAG.   Use these to set up values,
+ * rather than accessing the fields directly.  Note multiple eval of target!
+ */
+#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = InvalidBlockNumber, \
+	 (locktag).locktag_field4 = InvalidOffsetNumber)
+
+#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = (blocknum), \
+	 (locktag).locktag_field4 = InvalidOffsetNumber)
+
+#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
+	((locktag).locktag_field1 = (dboid), \
+	 (locktag).locktag_field2 = (reloid), \
+	 (locktag).locktag_field3 = (blocknum), \
+	 (locktag).locktag_field4 = (offnum))
+
+#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
+	((Oid) (locktag).locktag_field1)
+#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
+	((Oid) (locktag).locktag_field2)
+#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
+	((BlockNumber) (locktag).locktag_field3)
+#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
+	((OffsetNumber) (locktag).locktag_field4)
+#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)							 \
+	(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
+	 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE :   \
+	  PREDLOCKTAG_RELATION))
+
+/*
+ * Two-phase commit statefile records. There are two types: for each
+ * transaction, we generate one per-transaction record and a variable
+ * number of per-predicate-lock records.
+ */
+typedef enum TwoPhasePredicateRecordType
+{
+	TWOPHASEPREDICATERECORD_XACT,
+	TWOPHASEPREDICATERECORD_LOCK
+} TwoPhasePredicateRecordType;
+
+/*
+ * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
+ * much is needed because most of it not meaningful for a recovered
+ * prepared transaction.
+ *
+ * In particular, we do not record the in and out conflict lists for a
+ * prepared transaction because the associated SERIALIZABLEXACTs will
+ * not be available after recovery. Instead, we simply record the
+ * existence of each type of conflict by setting the transaction's
+ * summary conflict in/out flag.
+ */
+typedef struct TwoPhasePredicateXactRecord
+{
+	TransactionId xmin;
+	uint32		flags;
+} TwoPhasePredicateXactRecord;
+
+/* Per-lock state */
+typedef struct TwoPhasePredicateLockRecord
+{
+	PREDICATELOCKTARGETTAG target;
+	uint32		filler;			/* to avoid length change in back-patched fix */
+} TwoPhasePredicateLockRecord;
+
+typedef struct TwoPhasePredicateRecord
+{
+	TwoPhasePredicateRecordType type;
+	union
+	{
+		TwoPhasePredicateXactRecord xactRecord;
+		TwoPhasePredicateLockRecord lockRecord;
+	}			data;
+} TwoPhasePredicateRecord;
+
+/*
+ * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
+ */
+#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
+
+
+/*
+ * Function definitions for functions needing awareness of predicate
+ * locking internals.
+ */
+extern PredicateLockData *GetPredicateLockStatusData(void);
+extern int	GetSafeSnapshotBlockingPids(int blocked_pid,
+										int *output, int output_size);
+
+#endif							/* PREDICATE_INTERNALS_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
new file mode 100644
index 0000000..1ee9000
--- /dev/null
+++ b/src/include/storage/proc.h
@@ -0,0 +1,333 @@
+/*-------------------------------------------------------------------------
+ *
+ * proc.h
+ *	  per-process shared memory data structures
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/proc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PROC_H_
+#define _PROC_H_
+
+#include "access/clog.h"
+#include "access/xlogdefs.h"
+#include "lib/ilist.h"
+#include "storage/latch.h"
+#include "storage/lock.h"
+#include "storage/pg_sema.h"
+#include "storage/proclist_types.h"
+
+/*
+ * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
+ * for non-aborted subtransactions of its current top transaction.  These
+ * have to be treated as running XIDs by other backends.
+ *
+ * We also keep track of whether the cache overflowed (ie, the transaction has
+ * generated at least one subtransaction that didn't fit in the cache).
+ * If none of the caches have overflowed, we can assume that an XID that's not
+ * listed anywhere in the PGPROC array is not a running transaction.  Else we
+ * have to look at pg_subtrans.
+ */
+#define PGPROC_MAX_CACHED_SUBXIDS 64	/* XXX guessed-at value */
+
+struct XidCache
+{
+	TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
+};
+
+/*
+ * Flags for PGXACT->vacuumFlags
+ *
+ * Note: If you modify these flags, you need to modify PROCARRAY_XXX flags
+ * in src/include/storage/procarray.h.
+ *
+ * PROC_RESERVED may later be assigned for use in vacuumFlags, but its value is
+ * used for PROCARRAY_SLOTS_XMIN in procarray.h, so GetOldestXmin won't be able
+ * to match and ignore processes with this flag set.
+ */
+#define		PROC_IS_AUTOVACUUM	0x01	/* is it an autovac worker? */
+#define		PROC_IN_VACUUM		0x02	/* currently running lazy vacuum */
+#define		PROC_IN_ANALYZE		0x04	/* currently running analyze */
+#define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* set by autovac only */
+#define		PROC_IN_LOGICAL_DECODING	0x10	/* currently doing logical
+												 * decoding outside xact */
+#define		PROC_RESERVED				0x20	/* reserved for procarray */
+
+/* flags reset at EOXact */
+#define		PROC_VACUUM_STATE_MASK \
+	(PROC_IN_VACUUM | PROC_IN_ANALYZE | PROC_VACUUM_FOR_WRAPAROUND)
+
+/*
+ * We allow a small number of "weak" relation locks (AccessShareLock,
+ * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
+ * rather than the main lock table.  This eases contention on the lock
+ * manager LWLocks.  See storage/lmgr/README for additional details.
+ */
+#define		FP_LOCK_SLOTS_PER_BACKEND 16
+
+/*
+ * An invalid pgprocno.  Must be larger than the maximum number of PGPROC
+ * structures we could possibly have.  See comments for MAX_BACKENDS.
+ */
+#define INVALID_PGPROCNO		PG_INT32_MAX
+
+/*
+ * Each backend has a PGPROC struct in shared memory.  There is also a list of
+ * currently-unused PGPROC structs that will be reallocated to new backends.
+ *
+ * links: list link for any list the PGPROC is in.  When waiting for a lock,
+ * the PGPROC is linked into that lock's waitProcs queue.  A recycled PGPROC
+ * is linked into ProcGlobal's freeProcs list.
+ *
+ * Note: twophase.c also sets up a dummy PGPROC struct for each currently
+ * prepared transaction.  These PGPROCs appear in the ProcArray data structure
+ * so that the prepared transactions appear to be still running and are
+ * correctly shown as holding locks.  A prepared transaction PGPROC can be
+ * distinguished from a real one at need by the fact that it has pid == 0.
+ * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
+ * but its myProcLocks[] lists are valid.
+ */
+struct PGPROC
+{
+	/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
+	SHM_QUEUE	links;			/* list link if process is in a list */
+	PGPROC	  **procgloballist; /* procglobal list that owns this PGPROC */
+
+	PGSemaphore sem;			/* ONE semaphore to sleep on */
+	int			waitStatus;		/* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
+
+	Latch		procLatch;		/* generic latch for process */
+
+	LocalTransactionId lxid;	/* local id of top-level transaction currently
+								 * being executed by this proc, if running;
+								 * else InvalidLocalTransactionId */
+	int			pid;			/* Backend's process ID; 0 if prepared xact */
+	int			pgprocno;
+
+	/* These fields are zero while a backend is still starting up: */
+	BackendId	backendId;		/* This backend's backend ID (if assigned) */
+	Oid			databaseId;		/* OID of database this backend is using */
+	Oid			roleId;			/* OID of role using this backend */
+
+	Oid			tempNamespaceId;	/* OID of temp schema this backend is
+									 * using */
+
+	bool		isBackgroundWorker; /* true if background worker. */
+
+	/*
+	 * While in hot standby mode, shows that a conflict signal has been sent
+	 * for the current transaction. Set/cleared while holding ProcArrayLock,
+	 * though not required. Accessed without lock, if needed.
+	 */
+	bool		recoveryConflictPending;
+
+	/* Info about LWLock the process is currently waiting for, if any. */
+	bool		lwWaiting;		/* true if waiting for an LW lock */
+	uint8		lwWaitMode;		/* lwlock mode being waited for */
+	proclist_node lwWaitLink;	/* position in LW lock wait list */
+
+	/* Support for condition variables. */
+	proclist_node cvWaitLink;	/* position in CV wait list */
+
+	/* Info about lock the process is currently waiting for, if any. */
+	/* waitLock and waitProcLock are NULL if not currently waiting. */
+	LOCK	   *waitLock;		/* Lock object we're sleeping on ... */
+	PROCLOCK   *waitProcLock;	/* Per-holder info for awaited lock */
+	LOCKMODE	waitLockMode;	/* type of lock we're waiting for */
+	LOCKMASK	heldLocks;		/* bitmask for lock types already held on this
+								 * lock object by this backend */
+
+	bool		delayChkpt;		/* true if this proc delays checkpoint start */
+
+	/*
+	 * Info to allow us to wait for synchronous replication, if needed.
+	 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
+	 * syncRepState must not be touched except by owning process or WALSender.
+	 * syncRepLinks used only while holding SyncRepLock.
+	 */
+	XLogRecPtr	waitLSN;		/* waiting for this LSN or higher */
+	int			syncRepState;	/* wait state for sync rep */
+	SHM_QUEUE	syncRepLinks;	/* list link if process is in syncrep queue */
+
+	/*
+	 * All PROCLOCK objects for locks held or awaited by this backend are
+	 * linked into one of these lists, according to the partition number of
+	 * their lock.
+	 */
+	SHM_QUEUE	myProcLocks[NUM_LOCK_PARTITIONS];
+
+	struct XidCache subxids;	/* cache for subtransaction XIDs */
+
+	/* Support for group XID clearing. */
+	/* true, if member of ProcArray group waiting for XID clear */
+	bool		procArrayGroupMember;
+	/* next ProcArray group member waiting for XID clear */
+	pg_atomic_uint32 procArrayGroupNext;
+
+	/*
+	 * latest transaction id among the transaction's main XID and
+	 * subtransactions
+	 */
+	TransactionId procArrayGroupMemberXid;
+
+	uint32		wait_event_info;	/* proc's wait information */
+
+	/* Support for group transaction status update. */
+	bool		clogGroupMember;	/* true, if member of clog group */
+	pg_atomic_uint32 clogGroupNext; /* next clog group member */
+	TransactionId clogGroupMemberXid;	/* transaction id of clog group member */
+	XidStatus	clogGroupMemberXidStatus;	/* transaction status of clog
+											 * group member */
+	int			clogGroupMemberPage;	/* clog page corresponding to
+										 * transaction id of clog group member */
+	XLogRecPtr	clogGroupMemberLsn; /* WAL location of commit record for clog
+									 * group member */
+
+	/* Lock manager data, recording fast-path locks taken by this backend. */
+	LWLock		fpInfoLock;		/* protects per-backend fast-path state */
+	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
+	Oid			fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
+	bool		fpVXIDLock;		/* are we holding a fast-path VXID lock? */
+	LocalTransactionId fpLocalTransactionId;	/* lxid for fast-path VXID
+												 * lock */
+
+	/*
+	 * Support for lock groups.  Use LockHashPartitionLockByProc on the group
+	 * leader to get the LWLock protecting these fields.
+	 */
+	PGPROC	   *lockGroupLeader;	/* lock group leader, if I'm a member */
+	dlist_head	lockGroupMembers;	/* list of members, if I'm a leader */
+	dlist_node	lockGroupLink;	/* my member link, if I'm a member */
+};
+
+/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
+
+
+extern PGDLLIMPORT PGPROC *MyProc;
+extern PGDLLIMPORT struct PGXACT *MyPgXact;
+
+/*
+ * Prior to PostgreSQL 9.2, the fields below were stored as part of the
+ * PGPROC.  However, benchmarking revealed that packing these particular
+ * members into a separate array as tightly as possible sped up GetSnapshotData
+ * considerably on systems with many CPU cores, by reducing the number of
+ * cache lines needing to be fetched.  Thus, think very carefully before adding
+ * anything else here.
+ */
+typedef struct PGXACT
+{
+	TransactionId xid;			/* id of top-level transaction currently being
+								 * executed by this proc, if running and XID
+								 * is assigned; else InvalidTransactionId */
+
+	TransactionId xmin;			/* minimal running XID as it was when we were
+								 * starting our xact, excluding LAZY VACUUM:
+								 * vacuum must not remove tuples deleted by
+								 * xid >= xmin ! */
+
+	uint8		vacuumFlags;	/* vacuum-related flags, see above */
+	bool		overflowed;
+
+	uint8		nxids;
+} PGXACT;
+
+/*
+ * There is one ProcGlobal struct for the whole database cluster.
+ */
+typedef struct PROC_HDR
+{
+	/* Array of PGPROC structures (not including dummies for prepared txns) */
+	PGPROC	   *allProcs;
+	/* Array of PGXACT structures (not including dummies for prepared txns) */
+	PGXACT	   *allPgXact;
+	/* Length of allProcs array */
+	uint32		allProcCount;
+	/* Head of list of free PGPROC structures */
+	PGPROC	   *freeProcs;
+	/* Head of list of autovacuum's free PGPROC structures */
+	PGPROC	   *autovacFreeProcs;
+	/* Head of list of bgworker free PGPROC structures */
+	PGPROC	   *bgworkerFreeProcs;
+	/* Head of list of walsender free PGPROC structures */
+	PGPROC	   *walsenderFreeProcs;
+	/* First pgproc waiting for group XID clear */
+	pg_atomic_uint32 procArrayGroupFirst;
+	/* First pgproc waiting for group transaction status update */
+	pg_atomic_uint32 clogGroupFirst;
+	/* WALWriter process's latch */
+	Latch	   *walwriterLatch;
+	/* Checkpointer process's latch */
+	Latch	   *checkpointerLatch;
+	/* Current shared estimate of appropriate spins_per_delay value */
+	int			spins_per_delay;
+	/* The proc of the Startup process, since not in ProcArray */
+	PGPROC	   *startupProc;
+	int			startupProcPid;
+	/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
+	int			startupBufferPinWaitBufId;
+} PROC_HDR;
+
+extern PGDLLIMPORT PROC_HDR *ProcGlobal;
+
+extern PGPROC *PreparedXactProcs;
+
+/* Accessor for PGPROC given a pgprocno. */
+#define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
+
+/*
+ * We set aside some extra PGPROC structures for auxiliary processes,
+ * ie things that aren't full-fledged backends but need shmem access.
+ *
+ * Background writer, checkpointer and WAL writer run during normal operation.
+ * Startup process and WAL receiver also consume 2 slots, but WAL writer is
+ * launched only after startup has exited, so we only need 4 slots.
+ */
+#define NUM_AUXILIARY_PROCS		4
+
+/* configurable options */
+extern PGDLLIMPORT int DeadlockTimeout;
+extern PGDLLIMPORT int StatementTimeout;
+extern PGDLLIMPORT int LockTimeout;
+extern PGDLLIMPORT int IdleInTransactionSessionTimeout;
+extern bool log_lock_waits;
+
+
+/*
+ * Function Prototypes
+ */
+extern int	ProcGlobalSemas(void);
+extern Size ProcGlobalShmemSize(void);
+extern void InitProcGlobal(void);
+extern void InitProcess(void);
+extern void InitProcessPhase2(void);
+extern void InitAuxiliaryProcess(void);
+
+extern void PublishStartupProcessInformation(void);
+extern void SetStartupBufferPinWaitBufId(int bufid);
+extern int	GetStartupBufferPinWaitBufId(void);
+
+extern bool HaveNFreeProcs(int n);
+extern void ProcReleaseLocks(bool isCommit);
+
+extern void ProcQueueInit(PROC_QUEUE *queue);
+extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
+extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
+extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
+extern void CheckDeadLockAlert(void);
+extern bool IsWaitingForLock(void);
+extern void LockErrorCleanup(void);
+
+extern void ProcWaitForSignal(uint32 wait_event_info);
+extern void ProcSendSignal(int pid);
+
+extern PGPROC *AuxiliaryPidGetProc(int pid);
+
+extern void BecomeLockGroupLeader(void);
+extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
+
+#endif							/* _PROC_H_ */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
new file mode 100644
index 0000000..200ef8d
--- /dev/null
+++ b/src/include/storage/procarray.h
@@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * procarray.h
+ *	  POSTGRES process array definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/procarray.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PROCARRAY_H
+#define PROCARRAY_H
+
+#include "storage/lock.h"
+#include "storage/standby.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+
+/*
+ * These are to implement PROCARRAY_FLAGS_XXX
+ *
+ * Note: These flags are cloned from PROC_XXX flags in src/include/storage/proc.h
+ * to avoid forcing to include proc.h when including procarray.h. So if you modify
+ * PROC_XXX flags, you need to modify these flags.
+ */
+#define		PROCARRAY_VACUUM_FLAG			0x02	/* currently running lazy
+													 * vacuum */
+#define		PROCARRAY_ANALYZE_FLAG			0x04	/* currently running
+													 * analyze */
+#define		PROCARRAY_LOGICAL_DECODING_FLAG 0x10	/* currently doing logical
+													 * decoding outside xact */
+
+#define		PROCARRAY_SLOTS_XMIN			0x20	/* replication slot xmin,
+													 * catalog_xmin */
+/*
+ * Only flags in PROCARRAY_PROC_FLAGS_MASK are considered when matching
+ * PGXACT->vacuumFlags. Other flags are used for different purposes and
+ * have no corresponding PROC flag equivalent.
+ */
+#define		PROCARRAY_PROC_FLAGS_MASK	(PROCARRAY_VACUUM_FLAG | \
+										 PROCARRAY_ANALYZE_FLAG | \
+										 PROCARRAY_LOGICAL_DECODING_FLAG)
+
+/* Use the following flags as an input "flags" to GetOldestXmin function */
+/* Consider all backends except for logical decoding ones which manage xmin separately */
+#define		PROCARRAY_FLAGS_DEFAULT			PROCARRAY_LOGICAL_DECODING_FLAG
+/* Ignore vacuum backends */
+#define		PROCARRAY_FLAGS_VACUUM			PROCARRAY_FLAGS_DEFAULT | PROCARRAY_VACUUM_FLAG
+/* Ignore analyze backends */
+#define		PROCARRAY_FLAGS_ANALYZE			PROCARRAY_FLAGS_DEFAULT | PROCARRAY_ANALYZE_FLAG
+/* Ignore both vacuum and analyze backends */
+#define		PROCARRAY_FLAGS_VACUUM_ANALYZE	PROCARRAY_FLAGS_DEFAULT | PROCARRAY_VACUUM_FLAG | PROCARRAY_ANALYZE_FLAG
+
+extern Size ProcArrayShmemSize(void);
+extern void CreateSharedProcArray(void);
+extern void ProcArrayAdd(PGPROC *proc);
+extern void ProcArrayRemove(PGPROC *proc, TransactionId latestXid);
+
+extern void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid);
+extern void ProcArrayClearTransaction(PGPROC *proc);
+
+extern void ProcArrayInitRecovery(TransactionId initializedUptoXID);
+extern void ProcArrayApplyRecoveryInfo(RunningTransactions running);
+extern void ProcArrayApplyXidAssignment(TransactionId topxid,
+										int nsubxids, TransactionId *subxids);
+
+extern void RecordKnownAssignedTransactionIds(TransactionId xid);
+extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
+												  int nsubxids, TransactionId *subxids,
+												  TransactionId max_xid);
+extern void ExpireAllKnownAssignedTransactionIds(void);
+extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
+
+extern int	GetMaxSnapshotXidCount(void);
+extern int	GetMaxSnapshotSubxidCount(void);
+
+extern Snapshot GetSnapshotData(Snapshot snapshot);
+
+extern bool ProcArrayInstallImportedXmin(TransactionId xmin,
+										 VirtualTransactionId *sourcevxid);
+extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc);
+
+extern RunningTransactions GetRunningTransactionData(void);
+
+extern bool TransactionIdIsInProgress(TransactionId xid);
+extern bool TransactionIdIsActive(TransactionId xid);
+extern TransactionId GetOldestXmin(Relation rel, int flags);
+extern TransactionId GetOldestActiveTransactionId(void);
+extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly);
+
+extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids);
+extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids);
+
+extern PGPROC *BackendPidGetProc(int pid);
+extern PGPROC *BackendPidGetProcWithLock(int pid);
+extern int	BackendXidGetPid(TransactionId xid);
+extern bool IsBackendPid(int pid);
+
+extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
+												   bool excludeXmin0, bool allDbs, int excludeVacuum,
+												   int *nvxids);
+extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
+extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
+extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
+									  bool conflictPending);
+
+extern bool MinimumActiveBackends(int min);
+extern int	CountDBBackends(Oid databaseid);
+extern int	CountDBConnections(Oid databaseid);
+extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending);
+extern int	CountUserBackends(Oid roleid);
+extern bool CountOtherDBBackends(Oid databaseId,
+								 int *nbackends, int *nprepared);
+extern void TerminateOtherDBBackends(Oid databaseId);
+
+extern void XidCacheRemoveRunningXids(TransactionId xid,
+									  int nxids, const TransactionId *xids,
+									  TransactionId latestXid);
+
+extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
+											TransactionId catalog_xmin, bool already_locked);
+
+extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
+											TransactionId *catalog_xmin);
+
+#endif							/* PROCARRAY_H */
diff --git a/src/include/storage/proclist.h b/src/include/storage/proclist.h
new file mode 100644
index 0000000..9420091
--- /dev/null
+++ b/src/include/storage/proclist.h
@@ -0,0 +1,219 @@
+/*-------------------------------------------------------------------------
+ *
+ * proclist.h
+ *		operations on doubly-linked lists of pgprocnos
+ *
+ * The interface is similar to dlist from ilist.h, but uses pgprocno instead
+ * of pointers.  This allows proclist_head to be mapped at different addresses
+ * in different backends.
+ *
+ * See proclist_types.h for the structs that these functions operate on.  They
+ * are separated to break a header dependency cycle with proc.h.
+ *
+ * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/include/storage/proclist.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef PROCLIST_H
+#define PROCLIST_H
+
+#include "storage/proc.h"
+#include "storage/proclist_types.h"
+
+/*
+ * Initialize a proclist.
+ */
+static inline void
+proclist_init(proclist_head *list)
+{
+	list->head = list->tail = INVALID_PGPROCNO;
+}
+
+/*
+ * Is the list empty?
+ */
+static inline bool
+proclist_is_empty(proclist_head *list)
+{
+	return list->head == INVALID_PGPROCNO;
+}
+
+/*
+ * Get a pointer to a proclist_node inside a given PGPROC, given a procno and
+ * the proclist_node field's offset within struct PGPROC.
+ */
+static inline proclist_node *
+proclist_node_get(int procno, size_t node_offset)
+{
+	char	   *entry = (char *) GetPGProcByNumber(procno);
+
+	return (proclist_node *) (entry + node_offset);
+}
+
+/*
+ * Insert a process at the beginning of a list.
+ */
+static inline void
+proclist_push_head_offset(proclist_head *list, int procno, size_t node_offset)
+{
+	proclist_node *node = proclist_node_get(procno, node_offset);
+
+	Assert(node->next == 0 && node->prev == 0);
+
+	if (list->head == INVALID_PGPROCNO)
+	{
+		Assert(list->tail == INVALID_PGPROCNO);
+		node->next = node->prev = INVALID_PGPROCNO;
+		list->head = list->tail = procno;
+	}
+	else
+	{
+		Assert(list->tail != INVALID_PGPROCNO);
+		Assert(list->head != procno);
+		Assert(list->tail != procno);
+		node->next = list->head;
+		proclist_node_get(node->next, node_offset)->prev = procno;
+		node->prev = INVALID_PGPROCNO;
+		list->head = procno;
+	}
+}
+
+/*
+ * Insert a process at the end of a list.
+ */
+static inline void
+proclist_push_tail_offset(proclist_head *list, int procno, size_t node_offset)
+{
+	proclist_node *node = proclist_node_get(procno, node_offset);
+
+	Assert(node->next == 0 && node->prev == 0);
+
+	if (list->tail == INVALID_PGPROCNO)
+	{
+		Assert(list->head == INVALID_PGPROCNO);
+		node->next = node->prev = INVALID_PGPROCNO;
+		list->head = list->tail = procno;
+	}
+	else
+	{
+		Assert(list->head != INVALID_PGPROCNO);
+		Assert(list->head != procno);
+		Assert(list->tail != procno);
+		node->prev = list->tail;
+		proclist_node_get(node->prev, node_offset)->next = procno;
+		node->next = INVALID_PGPROCNO;
+		list->tail = procno;
+	}
+}
+
+/*
+ * Delete a process from a list --- it must be in the list!
+ */
+static inline void
+proclist_delete_offset(proclist_head *list, int procno, size_t node_offset)
+{
+	proclist_node *node = proclist_node_get(procno, node_offset);
+
+	Assert(node->next != 0 || node->prev != 0);
+
+	if (node->prev == INVALID_PGPROCNO)
+	{
+		Assert(list->head == procno);
+		list->head = node->next;
+	}
+	else
+		proclist_node_get(node->prev, node_offset)->next = node->next;
+
+	if (node->next == INVALID_PGPROCNO)
+	{
+		Assert(list->tail == procno);
+		list->tail = node->prev;
+	}
+	else
+		proclist_node_get(node->next, node_offset)->prev = node->prev;
+
+	node->next = node->prev = 0;
+}
+
+/*
+ * Check if a process is currently in a list.  It must be known that the
+ * process is not in any _other_ proclist that uses the same proclist_node,
+ * so that the only possibilities are that it is in this list or none.
+ */
+static inline bool
+proclist_contains_offset(proclist_head *list, int procno,
+						 size_t node_offset)
+{
+	proclist_node *node = proclist_node_get(procno, node_offset);
+
+	/* If it's not in any list, it's definitely not in this one. */
+	if (node->prev == 0 && node->next == 0)
+		return false;
+
+	/*
+	 * It must, in fact, be in this list.  Ideally, in assert-enabled builds,
+	 * we'd verify that.  But since this function is typically used while
+	 * holding a spinlock, crawling the whole list is unacceptable.  However,
+	 * we can verify matters in O(1) time when the node is a list head or
+	 * tail, and that seems worth doing, since in practice that should often
+	 * be enough to catch mistakes.
+	 */
+	Assert(node->prev != INVALID_PGPROCNO || list->head == procno);
+	Assert(node->next != INVALID_PGPROCNO || list->tail == procno);
+
+	return true;
+}
+
+/*
+ * Remove and return the first process from a list (there must be one).
+ */
+static inline PGPROC *
+proclist_pop_head_node_offset(proclist_head *list, size_t node_offset)
+{
+	PGPROC	   *proc;
+
+	Assert(!proclist_is_empty(list));
+	proc = GetPGProcByNumber(list->head);
+	proclist_delete_offset(list, list->head, node_offset);
+	return proc;
+}
+
+/*
+ * Helper macros to avoid repetition of offsetof(PGPROC, <member>).
+ * 'link_member' is the name of a proclist_node member in PGPROC.
+ */
+#define proclist_delete(list, procno, link_member) \
+	proclist_delete_offset((list), (procno), offsetof(PGPROC, link_member))
+#define proclist_push_head(list, procno, link_member) \
+	proclist_push_head_offset((list), (procno), offsetof(PGPROC, link_member))
+#define proclist_push_tail(list, procno, link_member) \
+	proclist_push_tail_offset((list), (procno), offsetof(PGPROC, link_member))
+#define proclist_pop_head_node(list, link_member) \
+	proclist_pop_head_node_offset((list), offsetof(PGPROC, link_member))
+#define proclist_contains(list, procno, link_member) \
+	proclist_contains_offset((list), (procno), offsetof(PGPROC, link_member))
+
+/*
+ * Iterate through the list pointed at by 'lhead', storing the current
+ * position in 'iter'.  'link_member' is the name of a proclist_node member in
+ * PGPROC.  Access the current position with iter.cur.
+ *
+ * The only list modification allowed while iterating is deleting the current
+ * node with proclist_delete(list, iter.cur, node_offset).
+ */
+#define proclist_foreach_modify(iter, lhead, link_member)					\
+	for (AssertVariableIsOfTypeMacro(iter, proclist_mutable_iter),			\
+		 AssertVariableIsOfTypeMacro(lhead, proclist_head *),				\
+		 (iter).cur = (lhead)->head,										\
+		 (iter).next = (iter).cur == INVALID_PGPROCNO ? INVALID_PGPROCNO :	\
+			 proclist_node_get((iter).cur,									\
+							   offsetof(PGPROC, link_member))->next;		\
+		 (iter).cur != INVALID_PGPROCNO;									\
+		 (iter).cur = (iter).next,											\
+		 (iter).next = (iter).cur == INVALID_PGPROCNO ? INVALID_PGPROCNO :	\
+			 proclist_node_get((iter).cur,									\
+							   offsetof(PGPROC, link_member))->next)
+
+#endif							/* PROCLIST_H */
diff --git a/src/include/storage/proclist_types.h b/src/include/storage/proclist_types.h
new file mode 100644
index 0000000..aba5578
--- /dev/null
+++ b/src/include/storage/proclist_types.h
@@ -0,0 +1,51 @@
+/*-------------------------------------------------------------------------
+ *
+ * proclist_types.h
+ *		doubly-linked lists of pgprocnos
+ *
+ * See proclist.h for functions that operate on these types.
+ *
+ * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/include/storage/proclist_types.h
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PROCLIST_TYPES_H
+#define PROCLIST_TYPES_H
+
+/*
+ * A node in a doubly-linked list of processes.  The link fields contain
+ * the 0-based PGPROC indexes of the next and previous process, or
+ * INVALID_PGPROCNO in the next-link of the last node and the prev-link
+ * of the first node.  A node that is currently not in any list
+ * should have next == prev == 0; this is not a possible state for a node
+ * that is in a list, because we disallow circularity.
+ */
+typedef struct proclist_node
+{
+	int			next;			/* pgprocno of the next PGPROC */
+	int			prev;			/* pgprocno of the prev PGPROC */
+} proclist_node;
+
+/*
+ * Header of a doubly-linked list of PGPROCs, identified by pgprocno.
+ * An empty list is represented by head == tail == INVALID_PGPROCNO.
+ */
+typedef struct proclist_head
+{
+	int			head;			/* pgprocno of the head PGPROC */
+	int			tail;			/* pgprocno of the tail PGPROC */
+} proclist_head;
+
+/*
+ * List iterator allowing some modifications while iterating.
+ */
+typedef struct proclist_mutable_iter
+{
+	int			cur;			/* pgprocno of the current PGPROC */
+	int			next;			/* pgprocno of the next PGPROC */
+} proclist_mutable_iter;
+
+#endif							/* PROCLIST_TYPES_H */
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
new file mode 100644
index 0000000..5cb3969
--- /dev/null
+++ b/src/include/storage/procsignal.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * procsignal.h
+ *	  Routines for interprocess signaling
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/procsignal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PROCSIGNAL_H
+#define PROCSIGNAL_H
+
+#include "storage/backendid.h"
+
+
+/*
+ * Reasons for signaling a Postgres child process (a backend or an auxiliary
+ * process, like checkpointer).  We can cope with concurrent signals for different
+ * reasons.  However, if the same reason is signaled multiple times in quick
+ * succession, the process is likely to observe only one notification of it.
+ * This is okay for the present uses.
+ *
+ * Also, because of race conditions, it's important that all the signals be
+ * defined so that no harm is done if a process mistakenly receives one.
+ */
+typedef enum
+{
+	PROCSIG_CATCHUP_INTERRUPT,	/* sinval catchup interrupt */
+	PROCSIG_NOTIFY_INTERRUPT,	/* listen/notify interrupt */
+	PROCSIG_PARALLEL_MESSAGE,	/* message from cooperating parallel backend */
+	PROCSIG_WALSND_INIT_STOPPING,	/* ask walsenders to prepare for shutdown  */
+	PROCSIG_BARRIER,			/* global barrier interrupt  */
+
+	/* Recovery conflict reasons */
+	PROCSIG_RECOVERY_CONFLICT_DATABASE,
+	PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
+	PROCSIG_RECOVERY_CONFLICT_LOCK,
+	PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
+	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+
+	NUM_PROCSIGNALS				/* Must be last! */
+} ProcSignalReason;
+
+typedef enum
+{
+	/*
+	 * XXX. PROCSIGNAL_BARRIER_PLACEHOLDER should be replaced when the first
+	 * real user of the ProcSignalBarrier mechanism is added. It's just here
+	 * for now because we can't have an empty enum.
+	 */
+	PROCSIGNAL_BARRIER_PLACEHOLDER = 0
+} ProcSignalBarrierType;
+
+/*
+ * prototypes for functions in procsignal.c
+ */
+extern Size ProcSignalShmemSize(void);
+extern void ProcSignalShmemInit(void);
+
+extern void ProcSignalInit(int pss_idx);
+extern int	SendProcSignal(pid_t pid, ProcSignalReason reason,
+						   BackendId backendId);
+
+extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type);
+extern void WaitForProcSignalBarrier(uint64 generation);
+extern void ProcessProcSignalBarrier(void);
+
+extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
+
+#endif							/* PROCSIGNAL_H */
diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h
new file mode 100644
index 0000000..15d2e41
--- /dev/null
+++ b/src/include/storage/reinit.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * reinit.h
+ *	  Reinitialization of unlogged relations
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/reinit.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef REINIT_H
+#define REINIT_H
+
+#include "common/relpath.h"
+
+
+extern void ResetUnloggedRelations(int op);
+extern bool parse_filename_for_nontemp_relation(const char *name,
+												int *oidchars, ForkNumber *fork);
+
+#define UNLOGGED_RELATION_CLEANUP		0x0001
+#define UNLOGGED_RELATION_INIT			0x0002
+
+#endif							/* REINIT_H */
diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h
new file mode 100644
index 0000000..4de9fc1
--- /dev/null
+++ b/src/include/storage/relfilenode.h
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * relfilenode.h
+ *	  Physical access information for relations.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/relfilenode.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RELFILENODE_H
+#define RELFILENODE_H
+
+#include "common/relpath.h"
+#include "storage/backendid.h"
+
+/*
+ * RelFileNode must provide all that we need to know to physically access
+ * a relation, with the exception of the backend ID, which can be provided
+ * separately. Note, however, that a "physical" relation is comprised of
+ * multiple files on the filesystem, as each fork is stored as a separate
+ * file, and each fork can be divided into multiple segments. See md.c.
+ *
+ * spcNode identifies the tablespace of the relation.  It corresponds to
+ * pg_tablespace.oid.
+ *
+ * dbNode identifies the database of the relation.  It is zero for
+ * "shared" relations (those common to all databases of a cluster).
+ * Nonzero dbNode values correspond to pg_database.oid.
+ *
+ * relNode identifies the specific relation.  relNode corresponds to
+ * pg_class.relfilenode (NOT pg_class.oid, because we need to be able
+ * to assign new physical files to relations in some situations).
+ * Notice that relNode is only unique within a database in a particular
+ * tablespace.
+ *
+ * Note: spcNode must be GLOBALTABLESPACE_OID if and only if dbNode is
+ * zero.  We support shared relations only in the "global" tablespace.
+ *
+ * Note: in pg_class we allow reltablespace == 0 to denote that the
+ * relation is stored in its database's "default" tablespace (as
+ * identified by pg_database.dattablespace).  However this shorthand
+ * is NOT allowed in RelFileNode structs --- the real tablespace ID
+ * must be supplied when setting spcNode.
+ *
+ * Note: in pg_class, relfilenode can be zero to denote that the relation
+ * is a "mapped" relation, whose current true filenode number is available
+ * from relmapper.c.  Again, this case is NOT allowed in RelFileNodes.
+ *
+ * Note: various places use RelFileNode in hashtable keys.  Therefore,
+ * there *must not* be any unused padding bytes in this struct.  That
+ * should be safe as long as all the fields are of type Oid.
+ */
+typedef struct RelFileNode
+{
+	Oid			spcNode;		/* tablespace */
+	Oid			dbNode;			/* database */
+	Oid			relNode;		/* relation */
+} RelFileNode;
+
+/*
+ * Augmenting a relfilenode with the backend ID provides all the information
+ * we need to locate the physical storage.  The backend ID is InvalidBackendId
+ * for regular relations (those accessible to more than one backend), or the
+ * owning backend's ID for backend-local relations.  Backend-local relations
+ * are always transient and removed in case of a database crash; they are
+ * never WAL-logged or fsync'd.
+ */
+typedef struct RelFileNodeBackend
+{
+	RelFileNode node;
+	BackendId	backend;
+} RelFileNodeBackend;
+
+#define RelFileNodeBackendIsTemp(rnode) \
+	((rnode).backend != InvalidBackendId)
+
+/*
+ * Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first
+ * since that is most likely to be different in two unequal RelFileNodes.  It
+ * is probably redundant to compare spcNode if the other fields are found equal,
+ * but do it anyway to be sure.  Likewise for checking the backend ID in
+ * RelFileNodeBackendEquals.
+ */
+#define RelFileNodeEquals(node1, node2) \
+	((node1).relNode == (node2).relNode && \
+	 (node1).dbNode == (node2).dbNode && \
+	 (node1).spcNode == (node2).spcNode)
+
+#define RelFileNodeBackendEquals(node1, node2) \
+	((node1).node.relNode == (node2).node.relNode && \
+	 (node1).node.dbNode == (node2).node.dbNode && \
+	 (node1).backend == (node2).backend && \
+	 (node1).node.spcNode == (node2).node.spcNode)
+
+#endif							/* RELFILENODE_H */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
new file mode 100644
index 0000000..31a5ca6
--- /dev/null
+++ b/src/include/storage/s_lock.h
@@ -0,0 +1,1047 @@
+/*-------------------------------------------------------------------------
+ *
+ * s_lock.h
+ *	   Hardware-dependent implementation of spinlocks.
+ *
+ *	NOTE: none of the macros in this file are intended to be called directly.
+ *	Call them through the hardware-independent macros in spin.h.
+ *
+ *	The following hardware-dependent macros must be provided for each
+ *	supported platform:
+ *
+ *	void S_INIT_LOCK(slock_t *lock)
+ *		Initialize a spinlock (to the unlocked state).
+ *
+ *	int S_LOCK(slock_t *lock)
+ *		Acquire a spinlock, waiting if necessary.
+ *		Time out and abort() if unable to acquire the lock in a
+ *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Should return number of "delays"; see s_lock.c
+ *
+ *	void S_UNLOCK(slock_t *lock)
+ *		Unlock a previously acquired lock.
+ *
+ *	bool S_LOCK_FREE(slock_t *lock)
+ *		Tests if the lock is free. Returns true if free, false if locked.
+ *		This does *not* change the state of the lock.
+ *
+ *	void SPIN_DELAY(void)
+ *		Delay operation to occur inside spinlock wait loop.
+ *
+ *	Note to implementors: there are default implementations for all these
+ *	macros at the bottom of the file.  Check if your platform can use
+ *	these or needs to override them.
+ *
+ *  Usually, S_LOCK() is implemented in terms of even lower-level macros
+ *	TAS() and TAS_SPIN():
+ *
+ *	int TAS(slock_t *lock)
+ *		Atomic test-and-set instruction.  Attempt to acquire the lock,
+ *		but do *not* wait.	Returns 0 if successful, nonzero if unable
+ *		to acquire the lock.
+ *
+ *	int TAS_SPIN(slock_t *lock)
+ *		Like TAS(), but this version is used when waiting for a lock
+ *		previously found to be contended.  By default, this is the
+ *		same as TAS(), but on some architectures it's better to poll a
+ *		contended lock using an unlocked instruction and retry the
+ *		atomic test-and-set only when it appears free.
+ *
+ *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
+ *	directly.
+ *
+ *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
+ *	failure to acquire a lock even when the lock is not locked.  For example,
+ *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
+ *	always be used, even if you are certain the lock is free.
+ *
+ *	It is the responsibility of these macros to make sure that the compiler
+ *	does not re-order accesses to shared memory to precede the actual lock
+ *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
+ *	was the caller's responsibility, which meant that callers had to use
+ *	volatile-qualified pointers to refer to both the spinlock itself and the
+ *	shared data being accessed within the spinlocked critical section.  This
+ *	was notationally awkward, easy to forget (and thus error-prone), and
+ *	prevented some useful compiler optimizations.  For these reasons, we
+ *	now require that the macros themselves prevent compiler re-ordering,
+ *	so that the caller doesn't need to take special precautions.
+ *
+ *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
+ *	S_UNLOCK() macros must further include hardware-level memory fence
+ *	instructions to prevent similar re-ordering at the hardware level.
+ *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
+ *	the macro are not executed until the lock has been obtained.  Conversely,
+ *	S_UNLOCK() must guarantee that loads and stores issued before the macro
+ *	have been executed before the lock is released.
+ *
+ *	On most supported platforms, TAS() uses a tas() function written
+ *	in assembly language to execute a hardware atomic-test-and-set
+ *	instruction.  Equivalent OS-supplied mutex routines could be used too.
+ *
+ *	If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
+ *	defined), then we fall back on an emulation that uses SysV semaphores
+ *	(see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
+ *	implementation, because of the cost of a kernel call per lock or unlock.
+ *	An old report is that Postgres spends around 40% of its time in semop(2)
+ *	when using the SysV semaphore code.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *	  src/include/storage/s_lock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef S_LOCK_H
+#define S_LOCK_H
+
+#ifdef FRONTEND
+#error "s_lock.h may not be included from frontend code"
+#endif
+
+#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+/*************************************************************************
+ * All the gcc inlines
+ * Gcc consistently defines the CPU as __cpu__.
+ * Other compilers use __cpu or __cpu__ so we test for both in those cases.
+ */
+
+/*----------
+ * Standard gcc asm format (assuming "volatile slock_t *lock"):
+
+	__asm__ __volatile__(
+		"	instruction	\n"
+		"	instruction	\n"
+		"	instruction	\n"
+:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
+:		"r"(lock)					// lock pointer, in input register
+:		"memory", "cc");			// show clobbered registers here
+
+ * The output-operands list (after first colon) should always include
+ * "+m"(*lock), whether or not the asm code actually refers to this
+ * operand directly.  This ensures that gcc believes the value in the
+ * lock variable is used and set by the asm code.  Also, the clobbers
+ * list (after third colon) should always include "memory"; this prevents
+ * gcc from thinking it can cache the values of shared-memory fields
+ * across the asm code.  Add "cc" if your asm code changes the condition
+ * code register, and also list any temp registers the code uses.
+ *----------
+ */
+
+
+#ifdef __i386__		/* 32-bit i386 */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register slock_t _res = 1;
+
+	/*
+	 * Use a non-locking test before asserting the bus lock.  Note that the
+	 * extra test appears to be a small loss on some x86 platforms and a small
+	 * win on others; it's by no means clear that we should keep it.
+	 *
+	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
+	 * macros.  Nowadays it probably would be better to do a non-locking test
+	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
+	 * testing to verify that.  Without some empirical evidence, better to
+	 * leave it alone.
+	 */
+	__asm__ __volatile__(
+		"	cmpb	$0,%1	\n"
+		"	jne		1f		\n"
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+		"1: \n"
+:		"+q"(_res), "+m"(*lock)
+:		/* no inputs */
+:		"memory", "cc");
+	return (int) _res;
+}
+
+#define SPIN_DELAY() spin_delay()
+
+static __inline__ void
+spin_delay(void)
+{
+	/*
+	 * This sequence is equivalent to the PAUSE instruction ("rep" is
+	 * ignored by old IA32 processors if the following instruction is
+	 * not a string operation); the IA-32 Architecture Software
+	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
+	 * PAUSE in the inner loop of a spin lock is necessary for good
+	 * performance:
+	 *
+	 *     The PAUSE instruction improves the performance of IA-32
+	 *     processors supporting Hyper-Threading Technology when
+	 *     executing spin-wait loops and other routines where one
+	 *     thread is accessing a shared lock or semaphore in a tight
+	 *     polling loop. When executing a spin-wait loop, the
+	 *     processor can suffer a severe performance penalty when
+	 *     exiting the loop because it detects a possible memory order
+	 *     violation and flushes the core processor's pipeline. The
+	 *     PAUSE instruction provides a hint to the processor that the
+	 *     code sequence is a spin-wait loop. The processor uses this
+	 *     hint to avoid the memory order violation and prevent the
+	 *     pipeline flush. In addition, the PAUSE instruction
+	 *     de-pipelines the spin-wait loop to prevent it from
+	 *     consuming execution resources excessively.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+
+#endif	 /* __i386__ */
+
+
+#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+/*
+ * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
+ * but only when spinning.
+ *
+ * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
+ * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
+ * available at:
+ * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
+ */
+#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register slock_t _res = 1;
+
+	__asm__ __volatile__(
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+:		"+q"(_res), "+m"(*lock)
+:		/* no inputs */
+:		"memory", "cc");
+	return (int) _res;
+}
+
+#define SPIN_DELAY() spin_delay()
+
+static __inline__ void
+spin_delay(void)
+{
+	/*
+	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
+	 * Opteron, but it may be of some use on EM64T, so we keep it.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+
+#endif	 /* __x86_64__ */
+
+
+#if defined(__ia64__) || defined(__ia64)
+/*
+ * Intel Itanium, gcc or Intel's compiler.
+ *
+ * Itanium has weak memory ordering, but we rely on the compiler to enforce
+ * strict ordering of accesses to volatile data.  In particular, while the
+ * xchg instruction implicitly acts as a memory barrier with 'acquire'
+ * semantics, we do not have an explicit memory fence instruction in the
+ * S_UNLOCK macro.  We use a regular assignment to clear the spinlock, and
+ * trust that the compiler marks the generated store instruction with the
+ * ".rel" opcode.
+ *
+ * Testing shows that assumption to hold on gcc, although I could not find
+ * any explicit statement on that in the gcc manual.  In Intel's compiler,
+ * the -m[no-]serialize-volatile option controls that, and testing shows that
+ * it is enabled by default.
+ *
+ * While icc accepts gcc asm blocks on x86[_64], this is not true on ia64
+ * (at least not in icc versions before 12.x).  So we have to carry a separate
+ * compiler-intrinsic-based implementation for it.
+ */
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#define TAS(lock) tas(lock)
+
+/* On IA64, it's a win to use a non-locking test before the xchg proper */
+#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
+
+#ifndef __INTEL_COMPILER
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	long int	ret;
+
+	__asm__ __volatile__(
+		"	xchg4 	%0=%1,%2	\n"
+:		"=r"(ret), "+m"(*lock)
+:		"r"(1)
+:		"memory");
+	return (int) ret;
+}
+
+#else /* __INTEL_COMPILER */
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	int		ret;
+
+	ret = _InterlockedExchange(lock,1);	/* this is a xchg asm macro */
+
+	return ret;
+}
+
+/* icc can't use the regular gcc S_UNLOCK() macro either in this case */
+#define S_UNLOCK(lock)	\
+	do { __memory_barrier(); *(lock) = 0; } while (0)
+
+#endif /* __INTEL_COMPILER */
+#endif	 /* __ia64__ || __ia64 */
+
+/*
+ * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
+ *
+ * We use the int-width variant of the builtin because it works on more chips
+ * than other widths.
+ */
+#if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64)
+#ifdef HAVE_GCC__SYNC_INT32_TAS
+#define HAS_TEST_AND_SET
+
+#define TAS(lock) tas(lock)
+
+typedef int slock_t;
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	return __sync_lock_test_and_set(lock, 1);
+}
+
+#define S_UNLOCK(lock) __sync_lock_release(lock)
+
+#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
+#endif	 /* __arm__ || __arm || __aarch64__ || __aarch64 */
+
+
+/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
+#if defined(__s390__) || defined(__s390x__)
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#define TAS(lock)	   tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	int			_res = 0;
+
+	__asm__	__volatile__(
+		"	cs 	%0,%3,0(%2)		\n"
+:		"+d"(_res), "+m"(*lock)
+:		"a"(lock), "d"(1)
+:		"memory", "cc");
+	return _res;
+}
+
+#endif	 /* __s390__ || __s390x__ */
+
+
+#if defined(__sparc__)		/* Sparc */
+/*
+ * Solaris has always run sparc processors in TSO (total store) mode, but
+ * linux didn't use to and the *BSDs still don't. So, be careful about
+ * acquire/release semantics. The CPU will treat superfluous membars as
+ * NOPs, so it's just code space.
+ */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register slock_t _res;
+
+	/*
+	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
+	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
+	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
+	 */
+	__asm__ __volatile__(
+		"	ldstub	[%2], %0	\n"
+:		"=r"(_res), "+m"(*lock)
+:		"r"(lock)
+:		"memory");
+#if defined(__sparcv7) || defined(__sparc_v7__)
+	/*
+	 * No stbar or membar available, luckily no actually produced hardware
+	 * requires a barrier.
+	 */
+#elif defined(__sparcv8) || defined(__sparc_v8__)
+	/* stbar is available (and required for both PSO, RMO), membar isn't */
+	__asm__ __volatile__ ("stbar	 \n":::"memory");
+#else
+	/*
+	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
+	 * barrier for sparcv8+ upwards.
+	 */
+	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
+#endif
+	return (int) _res;
+}
+
+#if defined(__sparcv7) || defined(__sparc_v7__)
+/*
+ * No stbar or membar available, luckily no actually produced hardware
+ * requires a barrier.  We fall through to the default gcc definition of
+ * S_UNLOCK in this case.
+ */
+#elif defined(__sparcv8) || defined(__sparc_v8__)
+/* stbar is available (and required for both PSO, RMO), membar isn't */
+#define S_UNLOCK(lock)	\
+do \
+{ \
+	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
+	*((volatile slock_t *) (lock)) = 0; \
+} while (0)
+#else
+/*
+ * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
+ * release barrier for sparcv8+ upwards.
+ */
+#define S_UNLOCK(lock)	\
+do \
+{ \
+	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
+	*((volatile slock_t *) (lock)) = 0; \
+} while (0)
+#endif
+
+#endif	 /* __sparc__ */
+
+
+/* PowerPC */
+#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#define TAS(lock) tas(lock)
+
+/* On PPC, it's a win to use a non-locking test before the lwarx */
+#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
+
+/*
+ * The second operand of addi can hold a constant zero or a register number,
+ * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
+ * base register"; most operands having this register-or-zero property are
+ * address bases, e.g. the second operand of lwax.
+ *
+ * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
+ * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
+ * On newer machines, we can use lwsync instead for better performance.
+ *
+ * Ordinarily, we'd code the branches here using GNU-style local symbols, that
+ * is "1f" referencing "1:" and so on.  But some people run gcc on AIX with
+ * IBM's assembler as backend, and IBM's assembler doesn't do local symbols.
+ * So hand-code the branch offsets; fortunately, all PPC instructions are
+ * exactly 4 bytes each, so it's not too hard to count.
+ */
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	slock_t _t;
+	int _res;
+
+	__asm__ __volatile__(
+#ifdef USE_PPC_LWARX_MUTEX_HINT
+"	lwarx   %0,0,%3,1	\n"
+#else
+"	lwarx   %0,0,%3		\n"
+#endif
+"	cmpwi   %0,0		\n"
+"	bne     $+16		\n"		/* branch to li %1,1 */
+"	addi    %0,%0,1		\n"
+"	stwcx.  %0,0,%3		\n"
+"	beq     $+12		\n"		/* branch to lwsync/isync */
+"	li      %1,1		\n"
+"	b       $+12		\n"		/* branch to end of asm sequence */
+#ifdef USE_PPC_LWSYNC
+"	lwsync				\n"
+#else
+"	isync				\n"
+#endif
+"	li      %1,0		\n"
+
+:	"=&b"(_t), "=r"(_res), "+m"(*lock)
+:	"r"(lock)
+:	"memory", "cc");
+	return _res;
+}
+
+/*
+ * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
+ * On newer machines, we can use lwsync instead for better performance.
+ */
+#ifdef USE_PPC_LWSYNC
+#define S_UNLOCK(lock)	\
+do \
+{ \
+	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
+	*((volatile slock_t *) (lock)) = 0; \
+} while (0)
+#else
+#define S_UNLOCK(lock)	\
+do \
+{ \
+	__asm__ __volatile__ ("	sync \n" ::: "memory"); \
+	*((volatile slock_t *) (lock)) = 0; \
+} while (0)
+#endif /* USE_PPC_LWSYNC */
+
+#endif /* powerpc */
+
+
+/* Linux Motorola 68k */
+#if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register int rv;
+
+	__asm__	__volatile__(
+		"	clrl	%0		\n"
+		"	tas		%1		\n"
+		"	sne		%0		\n"
+:		"=d"(rv), "+m"(*lock)
+:		/* no inputs */
+:		"memory", "cc");
+	return rv;
+}
+
+#endif	 /* (__mc68000__ || __m68k__) && __linux__ */
+
+
+/* Motorola 88k */
+#if defined(__m88k__)
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register slock_t _res = 1;
+
+	__asm__ __volatile__(
+		"	xmem	%0, %2, %%r0	\n"
+:		"+r"(_res), "+m"(*lock)
+:		"r"(lock)
+:		"memory");
+	return (int) _res;
+}
+
+#endif	 /* __m88k__ */
+
+
+/*
+ * VAXen -- even multiprocessor ones
+ * (thanks to Tom Ivar Helbekkmo)
+ */
+#if defined(__vax__)
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register int	_res;
+
+	__asm__ __volatile__(
+		"	movl 	$1, %0			\n"
+		"	bbssi	$0, (%2), 1f	\n"
+		"	clrl	%0				\n"
+		"1: \n"
+:		"=&r"(_res), "+m"(*lock)
+:		"r"(lock)
+:		"memory");
+	return _res;
+}
+
+#endif	 /* __vax__ */
+
+
+#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#define TAS(lock) tas(lock)
+
+/*
+ * Original MIPS-I processors lacked the LL/SC instructions, but if we are
+ * so unfortunate as to be running on one of those, we expect that the kernel
+ * will handle the illegal-instruction traps and emulate them for us.  On
+ * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
+ * choice because any other synchronization method must involve a kernel
+ * call.  Unfortunately, many toolchains still default to MIPS-I as the
+ * codegen target; if the symbol __mips shows that that's the case, we
+ * have to force the assembler to accept LL/SC.
+ *
+ * R10000 and up processors require a separate SYNC, which has the same
+ * issues as LL/SC.
+ */
+#if __mips < 2
+#define MIPS_SET_MIPS2	"       .set mips2          \n"
+#else
+#define MIPS_SET_MIPS2
+#endif
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register volatile slock_t *_l = lock;
+	register int _res;
+	register int _tmp;
+
+	__asm__ __volatile__(
+		"       .set push           \n"
+		MIPS_SET_MIPS2
+		"       .set noreorder      \n"
+		"       .set nomacro        \n"
+		"       ll      %0, %2      \n"
+		"       or      %1, %0, 1   \n"
+		"       sc      %1, %2      \n"
+		"       xori    %1, 1       \n"
+		"       or      %0, %0, %1  \n"
+		"       sync                \n"
+		"       .set pop              "
+:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
+:		/* no inputs */
+:		"memory");
+	return _res;
+}
+
+/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
+#define S_UNLOCK(lock)	\
+do \
+{ \
+	__asm__ __volatile__( \
+		"       .set push           \n" \
+		MIPS_SET_MIPS2 \
+		"       .set noreorder      \n" \
+		"       .set nomacro        \n" \
+		"       sync                \n" \
+		"       .set pop              " \
+:		/* no outputs */ \
+:		/* no inputs */	\
+:		"memory"); \
+	*((volatile slock_t *) (lock)) = 0; \
+} while (0)
+
+#endif /* __mips__ && !__sgi */
+
+
+#if defined(__m32r__) && defined(HAVE_SYS_TAS_H)	/* Renesas' M32R */
+#define HAS_TEST_AND_SET
+
+#include <sys/tas.h>
+
+typedef int slock_t;
+
+#define TAS(lock) tas(lock)
+
+#endif /* __m32r__ */
+
+
+#if defined(__sh__)				/* Renesas' SuperH */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	register int _res;
+
+	/*
+	 * This asm is coded as if %0 could be any register, but actually SuperH
+	 * restricts the target of xor-immediate to be R0.  That's handled by
+	 * the "z" constraint on _res.
+	 */
+	__asm__ __volatile__(
+		"	tas.b @%2    \n"
+		"	movt  %0     \n"
+		"	xor   #1,%0  \n"
+:		"=z"(_res), "+m"(*lock)
+:		"r"(lock)
+:		"memory", "t");
+	return _res;
+}
+
+#endif	 /* __sh__ */
+
+
+/* These live in s_lock.c, but only for gcc */
+
+
+#if defined(__m68k__) && !defined(__linux__)	/* non-Linux Motorola 68k */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+#endif
+
+/*
+ * Default implementation of S_UNLOCK() for gcc/icc.
+ *
+ * Note that this implementation is unsafe for any platform that can reorder
+ * a memory access (either load or store) after a following store.  That
+ * happens not to be possible on x86 and most legacy architectures (some are
+ * single-processor!), but many modern systems have weaker memory ordering.
+ * Those that do must define their own version of S_UNLOCK() rather than
+ * relying on this one.
+ */
+#if !defined(S_UNLOCK)
+#define S_UNLOCK(lock)	\
+	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
+#endif
+
+#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+
+
+/*
+ * ---------------------------------------------------------------------
+ * Platforms that use non-gcc inline assembly:
+ * ---------------------------------------------------------------------
+ */
+
+#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
+
+
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+/*
+ * HP's PA-RISC
+ *
+ * See src/backend/port/hpux/tas.c.template for details about LDCWX.  Because
+ * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
+ * struct.  The active word in the struct is whichever has the aligned address;
+ * the other three words just sit at -1.
+ *
+ * When using gcc, we can inline the required assembly code.
+ */
+#define HAS_TEST_AND_SET
+
+typedef struct
+{
+	int			sema[4];
+} slock_t;
+
+#define TAS_ACTIVE_WORD(lock)	((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
+
+#if defined(__GNUC__)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+	volatile int *lockword = TAS_ACTIVE_WORD(lock);
+	register int lockval;
+
+	__asm__ __volatile__(
+		"	ldcwx	0(0,%2),%0	\n"
+:		"=r"(lockval), "+m"(*lockword)
+:		"r"(lockword)
+:		"memory");
+	return (lockval == 0);
+}
+
+/*
+ * The hppa implementation doesn't follow the rules of this files and provides
+ * a gcc specific implementation outside of the above defined(__GNUC__). It
+ * does so to avoid duplication between the HP compiler and gcc. So undefine
+ * the generic fallback S_UNLOCK from above.
+ */
+#ifdef S_UNLOCK
+#undef S_UNLOCK
+#endif
+#define S_UNLOCK(lock)	\
+	do { \
+		__asm__ __volatile__("" : : : "memory"); \
+		*TAS_ACTIVE_WORD(lock) = -1; \
+	} while (0)
+
+#endif /* __GNUC__ */
+
+#define S_INIT_LOCK(lock) \
+	do { \
+		volatile slock_t *lock_ = (lock); \
+		lock_->sema[0] = -1; \
+		lock_->sema[1] = -1; \
+		lock_->sema[2] = -1; \
+		lock_->sema[3] = -1; \
+	} while (0)
+
+#define S_LOCK_FREE(lock)	(*TAS_ACTIVE_WORD(lock) != 0)
+
+#endif	 /* __hppa || __hppa__ */
+
+
+#if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
+/*
+ * HP-UX on Itanium, non-gcc/icc compiler
+ *
+ * We assume that the compiler enforces strict ordering of loads/stores on
+ * volatile data (see comments on the gcc-version earlier in this file).
+ * Note that this assumption does *not* hold if you use the
+ * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
+ *
+ * See also Implementing Spinlocks on the Intel Itanium Architecture and
+ * PA-RISC, by Tor Ekqvist and David Graves, for more information.  As of
+ * this writing, version 1.0 of the manual is available at:
+ * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
+ */
+#define HAS_TEST_AND_SET
+
+typedef unsigned int slock_t;
+
+#include <ia64/sys/inline.h>
+#define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
+/* On IA64, it's a win to use a non-locking test before the xchg proper */
+#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
+#define S_UNLOCK(lock)	\
+	do { _Asm_mf(); (*(lock)) = 0; } while (0)
+
+#endif	/* HPUX on IA64, non gcc/icc */
+
+#if defined(_AIX)	/* AIX */
+/*
+ * AIX (POWER)
+ */
+#define HAS_TEST_AND_SET
+
+#include <sys/atomic_op.h>
+
+typedef int slock_t;
+
+#define TAS(lock)			_check_lock((slock_t *) (lock), 0, 1)
+#define S_UNLOCK(lock)		_clear_lock((slock_t *) (lock), 0)
+#endif	 /* _AIX */
+
+
+/* These are in sunstudio_(sparc|x86).s */
+
+#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
+#define HAS_TEST_AND_SET
+
+#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
+typedef unsigned int slock_t;
+#else
+typedef unsigned char slock_t;
+#endif
+
+extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
+									  slock_t cmp);
+
+#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
+#endif
+
+
+#ifdef _MSC_VER
+typedef LONG slock_t;
+
+#define HAS_TEST_AND_SET
+#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
+
+#define SPIN_DELAY() spin_delay()
+
+/* If using Visual C++ on Win64, inline assembly is unavailable.
+ * Use a _mm_pause intrinsic instead of rep nop.
+ */
+#if defined(_WIN64)
+static __forceinline void
+spin_delay(void)
+{
+	_mm_pause();
+}
+#else
+static __forceinline void
+spin_delay(void)
+{
+	/* See comment for gcc code. Same code, MASM syntax */
+	__asm rep nop;
+}
+#endif
+
+#include <intrin.h>
+#pragma intrinsic(_ReadWriteBarrier)
+
+#define S_UNLOCK(lock)	\
+	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
+
+#endif
+
+
+#endif	/* !defined(HAS_TEST_AND_SET) */
+
+
+/* Blow up if we didn't have any way to do spinlocks */
+#ifndef HAS_TEST_AND_SET
+#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@lists.postgresql.org.
+#endif
+
+
+#else	/* !HAVE_SPINLOCKS */
+
+
+/*
+ * Fake spinlock implementation using semaphores --- slow and prone
+ * to fall foul of kernel limits on number of semaphores, so don't use this
+ * unless you must!  The subroutines appear in spin.c.
+ */
+typedef int slock_t;
+
+extern bool s_lock_free_sema(volatile slock_t *lock);
+extern void s_unlock_sema(volatile slock_t *lock);
+extern void s_init_lock_sema(volatile slock_t *lock, bool nested);
+extern int	tas_sema(volatile slock_t *lock);
+
+#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
+#define S_UNLOCK(lock)	 s_unlock_sema(lock)
+#define S_INIT_LOCK(lock)	s_init_lock_sema(lock, false)
+#define TAS(lock)	tas_sema(lock)
+
+
+#endif	/* HAVE_SPINLOCKS */
+
+
+/*
+ * Default Definitions - override these above as needed.
+ */
+
+#if !defined(S_LOCK)
+#define S_LOCK(lock) \
+	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, PG_FUNCNAME_MACRO) : 0)
+#endif	 /* S_LOCK */
+
+#if !defined(S_LOCK_FREE)
+#define S_LOCK_FREE(lock)	(*(lock) == 0)
+#endif	 /* S_LOCK_FREE */
+
+#if !defined(S_UNLOCK)
+/*
+ * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
+ * is unsafe if the platform can reorder a memory access (either load or
+ * store) after a following store; platforms where this is possible must
+ * define their own S_UNLOCK.  But CPU reordering is not the only concern:
+ * if we simply defined S_UNLOCK() as an inline macro, the compiler might
+ * reorder instructions from inside the critical section to occur after the
+ * lock release.  Since the compiler probably can't know what the external
+ * function s_unlock is doing, putting the same logic there should be adequate.
+ * A sufficiently-smart globally optimizing compiler could break that
+ * assumption, though, and the cost of a function call for every spinlock
+ * release may hurt performance significantly, so we use this implementation
+ * only for platforms where we don't know of a suitable intrinsic.  For the
+ * most part, those are relatively obscure platform/compiler combinations to
+ * which the PostgreSQL project does not have access.
+ */
+#define USE_DEFAULT_S_UNLOCK
+extern void s_unlock(volatile slock_t *lock);
+#define S_UNLOCK(lock)		s_unlock(lock)
+#endif	 /* S_UNLOCK */
+
+#if !defined(S_INIT_LOCK)
+#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
+#endif	 /* S_INIT_LOCK */
+
+#if !defined(SPIN_DELAY)
+#define SPIN_DELAY()	((void) 0)
+#endif	 /* SPIN_DELAY */
+
+#if !defined(TAS)
+extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
+												 * s_lock.c */
+
+#define TAS(lock)		tas(lock)
+#endif	 /* TAS */
+
+#if !defined(TAS_SPIN)
+#define TAS_SPIN(lock)	TAS(lock)
+#endif	 /* TAS_SPIN */
+
+extern slock_t dummy_spinlock;
+
+/*
+ * Platform-independent out-of-line support routines
+ */
+extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
+
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY  100
+
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int	update_spins_per_delay(int shared_spins_per_delay);
+
+/*
+ * Support for spin delay which is useful in various places where
+ * spinlock-like procedures take place.
+ */
+typedef struct
+{
+	int			spins;
+	int			delays;
+	int			cur_delay;
+	const char *file;
+	int			line;
+	const char *func;
+} SpinDelayStatus;
+
+static inline void
+init_spin_delay(SpinDelayStatus *status,
+				const char *file, int line, const char *func)
+{
+	status->spins = 0;
+	status->delays = 0;
+	status->cur_delay = 0;
+	status->file = file;
+	status->line = line;
+	status->func = func;
+}
+
+#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, PG_FUNCNAME_MACRO)
+void perform_spin_delay(SpinDelayStatus *status);
+void finish_spin_delay(SpinDelayStatus *status);
+
+#endif	 /* S_LOCK_H */
diff --git a/src/include/storage/sharedfileset.h b/src/include/storage/sharedfileset.h
new file mode 100644
index 0000000..2d6cf07
--- /dev/null
+++ b/src/include/storage/sharedfileset.h
@@ -0,0 +1,45 @@
+/*-------------------------------------------------------------------------
+ *
+ * sharedfileset.h
+ *	  Shared temporary file management.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/sharedfileset.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SHAREDFILESET_H
+#define SHAREDFILESET_H
+
+#include "storage/dsm.h"
+#include "storage/fd.h"
+#include "storage/spin.h"
+
+/*
+ * A set of temporary files that can be shared by multiple backends.
+ */
+typedef struct SharedFileSet
+{
+	pid_t		creator_pid;	/* PID of the creating process */
+	uint32		number;			/* per-PID identifier */
+	slock_t		mutex;			/* mutex protecting the reference count */
+	int			refcnt;			/* number of attached backends */
+	int			ntablespaces;	/* number of tablespaces to use */
+	Oid			tablespaces[8]; /* OIDs of tablespaces to use. Assumes that
+								 * it's rare that there more than temp
+								 * tablespaces. */
+} SharedFileSet;
+
+extern void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg);
+extern void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg);
+extern File SharedFileSetCreate(SharedFileSet *fileset, const char *name);
+extern File SharedFileSetOpen(SharedFileSet *fileset, const char *name);
+extern bool SharedFileSetDelete(SharedFileSet *fileset, const char *name,
+								bool error_on_failure);
+extern void SharedFileSetDeleteAll(SharedFileSet *fileset);
+
+#endif
diff --git a/src/include/storage/shm_mq.h b/src/include/storage/shm_mq.h
new file mode 100644
index 0000000..5005f2c
--- /dev/null
+++ b/src/include/storage/shm_mq.h
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_mq.h
+ *	  single-reader, single-writer shared memory message queue
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/shm_mq.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SHM_MQ_H
+#define SHM_MQ_H
+
+#include "postmaster/bgworker.h"
+#include "storage/dsm.h"
+#include "storage/proc.h"
+
+/* The queue itself, in shared memory. */
+struct shm_mq;
+typedef struct shm_mq shm_mq;
+
+/* Backend-private state. */
+struct shm_mq_handle;
+typedef struct shm_mq_handle shm_mq_handle;
+
+/* Descriptors for a single write spanning multiple locations. */
+typedef struct
+{
+	const char *data;
+	Size		len;
+} shm_mq_iovec;
+
+/* Possible results of a send or receive operation. */
+typedef enum
+{
+	SHM_MQ_SUCCESS,				/* Sent or received a message. */
+	SHM_MQ_WOULD_BLOCK,			/* Not completed; retry later. */
+	SHM_MQ_DETACHED				/* Other process has detached queue. */
+} shm_mq_result;
+
+/*
+ * Primitives to create a queue and set the sender and receiver.
+ *
+ * Both the sender and the receiver must be set before any messages are read
+ * or written, but they need not be set by the same process.  Each must be
+ * set exactly once.
+ */
+extern shm_mq *shm_mq_create(void *address, Size size);
+extern void shm_mq_set_receiver(shm_mq *mq, PGPROC *);
+extern void shm_mq_set_sender(shm_mq *mq, PGPROC *);
+
+/* Accessor methods for sender and receiver. */
+extern PGPROC *shm_mq_get_receiver(shm_mq *);
+extern PGPROC *shm_mq_get_sender(shm_mq *);
+
+/* Set up backend-local queue state. */
+extern shm_mq_handle *shm_mq_attach(shm_mq *mq, dsm_segment *seg,
+									BackgroundWorkerHandle *handle);
+
+/* Associate worker handle with shm_mq. */
+extern void shm_mq_set_handle(shm_mq_handle *, BackgroundWorkerHandle *);
+
+/* Break connection, release handle resources. */
+extern void shm_mq_detach(shm_mq_handle *mqh);
+
+/* Get the shm_mq from handle. */
+extern shm_mq *shm_mq_get_queue(shm_mq_handle *mqh);
+
+/* Send or receive messages. */
+extern shm_mq_result shm_mq_send(shm_mq_handle *mqh,
+								 Size nbytes, const void *data, bool nowait);
+extern shm_mq_result shm_mq_sendv(shm_mq_handle *mqh,
+								  shm_mq_iovec *iov, int iovcnt, bool nowait);
+extern shm_mq_result shm_mq_receive(shm_mq_handle *mqh,
+									Size *nbytesp, void **datap, bool nowait);
+
+/* Wait for our counterparty to attach to the queue. */
+extern shm_mq_result shm_mq_wait_for_attach(shm_mq_handle *mqh);
+
+/* Smallest possible queue. */
+extern PGDLLIMPORT const Size shm_mq_minimum_size;
+
+#endif							/* SHM_MQ_H */
diff --git a/src/include/storage/shm_toc.h b/src/include/storage/shm_toc.h
new file mode 100644
index 0000000..c3886d5
--- /dev/null
+++ b/src/include/storage/shm_toc.h
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_toc.h
+ *	  shared memory segment table of contents
+ *
+ * This is intended to provide a simple way to divide a chunk of shared
+ * memory (probably dynamic shared memory allocated via dsm_create) into
+ * a number of regions and keep track of the addresses of those regions or
+ * key data structures within those regions.  This is not intended to
+ * scale to a large number of keys and will perform poorly if used that
+ * way; if you need a large number of pointers, store them within some
+ * other data structure within the segment and only put the pointer to
+ * the data structure itself in the table of contents.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/shm_toc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SHM_TOC_H
+#define SHM_TOC_H
+
+#include "storage/shmem.h"		/* for add_size() */
+
+/* shm_toc is an opaque type known only within shm_toc.c */
+typedef struct shm_toc shm_toc;
+
+extern shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes);
+extern shm_toc *shm_toc_attach(uint64 magic, void *address);
+extern void *shm_toc_allocate(shm_toc *toc, Size nbytes);
+extern Size shm_toc_freespace(shm_toc *toc);
+extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address);
+extern void *shm_toc_lookup(shm_toc *toc, uint64 key, bool noError);
+
+/*
+ * Tools for estimating how large a chunk of shared memory will be needed
+ * to store a TOC and its dependent objects.  Note: we don't really support
+ * large numbers of keys, but it's convenient to declare number_of_keys
+ * as a Size anyway.
+ */
+typedef struct
+{
+	Size		space_for_chunks;
+	Size		number_of_keys;
+} shm_toc_estimator;
+
+#define shm_toc_initialize_estimator(e) \
+	((e)->space_for_chunks = 0, (e)->number_of_keys = 0)
+#define shm_toc_estimate_chunk(e, sz) \
+	((e)->space_for_chunks = add_size((e)->space_for_chunks, BUFFERALIGN(sz)))
+#define shm_toc_estimate_keys(e, cnt) \
+	((e)->number_of_keys = add_size((e)->number_of_keys, cnt))
+
+extern Size shm_toc_estimate(shm_toc_estimator *e);
+
+#endif							/* SHM_TOC_H */
diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h
new file mode 100644
index 0000000..e9e32ab
--- /dev/null
+++ b/src/include/storage/shmem.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * shmem.h
+ *	  shared memory management structures
+ *
+ * Historical note:
+ * A long time ago, Postgres' shared memory region was allowed to be mapped
+ * at a different address in each process, and shared memory "pointers" were
+ * passed around as offsets relative to the start of the shared memory region.
+ * That is no longer the case: each process must map the shared memory region
+ * at the same address.  This means shared memory pointers can be passed
+ * around directly between different processes.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/shmem.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SHMEM_H
+#define SHMEM_H
+
+#include "utils/hsearch.h"
+
+
+/* shmqueue.c */
+typedef struct SHM_QUEUE
+{
+	struct SHM_QUEUE *prev;
+	struct SHM_QUEUE *next;
+} SHM_QUEUE;
+
+/* shmem.c */
+extern void InitShmemAccess(void *seghdr);
+extern void InitShmemAllocation(void);
+extern void *ShmemAlloc(Size size);
+extern void *ShmemAllocNoError(Size size);
+extern void *ShmemAllocUnlocked(Size size);
+extern bool ShmemAddrIsValid(const void *addr);
+extern void InitShmemIndex(void);
+extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size,
+						   HASHCTL *infoP, int hash_flags);
+extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr);
+extern Size add_size(Size s1, Size s2);
+extern Size mul_size(Size s1, Size s2);
+
+/* ipci.c */
+extern void RequestAddinShmemSpace(Size size);
+
+/* size constants for the shmem index table */
+ /* max size of data structure string name */
+#define SHMEM_INDEX_KEYSIZE		 (48)
+ /* estimated size of the shmem index table (not a hard limit) */
+#define SHMEM_INDEX_SIZE		 (64)
+
+/* this is a hash bucket in the shmem index table */
+typedef struct
+{
+	char		key[SHMEM_INDEX_KEYSIZE];	/* string name */
+	void	   *location;		/* location in shared mem */
+	Size		size;			/* # bytes requested for the structure */
+	Size		allocated_size; /* # bytes actually allocated */
+} ShmemIndexEnt;
+
+/*
+ * prototypes for functions in shmqueue.c
+ */
+extern void SHMQueueInit(SHM_QUEUE *queue);
+extern void SHMQueueElemInit(SHM_QUEUE *queue);
+extern void SHMQueueDelete(SHM_QUEUE *queue);
+extern void SHMQueueInsertBefore(SHM_QUEUE *queue, SHM_QUEUE *elem);
+extern void SHMQueueInsertAfter(SHM_QUEUE *queue, SHM_QUEUE *elem);
+extern Pointer SHMQueueNext(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
+							Size linkOffset);
+extern Pointer SHMQueuePrev(const SHM_QUEUE *queue, const SHM_QUEUE *curElem,
+							Size linkOffset);
+extern bool SHMQueueEmpty(const SHM_QUEUE *queue);
+extern bool SHMQueueIsDetached(const SHM_QUEUE *queue);
+
+#endif							/* SHMEM_H */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
new file mode 100644
index 0000000..903818b
--- /dev/null
+++ b/src/include/storage/sinval.h
@@ -0,0 +1,153 @@
+/*-------------------------------------------------------------------------
+ *
+ * sinval.h
+ *	  POSTGRES shared cache invalidation communication definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/sinval.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SINVAL_H
+#define SINVAL_H
+
+#include <signal.h>
+
+#include "storage/relfilenode.h"
+
+/*
+ * We support several types of shared-invalidation messages:
+ *	* invalidate a specific tuple in a specific catcache
+ *	* invalidate all catcache entries from a given system catalog
+ *	* invalidate a relcache entry for a specific logical relation
+ *	* invalidate all relcache entries
+ *	* invalidate an smgr cache entry for a specific physical relation
+ *	* invalidate the mapped-relation mapping for a given database
+ *	* invalidate any saved snapshot that might be used to scan a given relation
+ * More types could be added if needed.  The message type is identified by
+ * the first "int8" field of the message struct.  Zero or positive means a
+ * specific-catcache inval message (and also serves as the catcache ID field).
+ * Negative values identify the other message types, as per codes below.
+ *
+ * Catcache inval events are initially driven by detecting tuple inserts,
+ * updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
+ * An update can generate two inval events, one for the old tuple and one for
+ * the new, but this is reduced to one event if the tuple's hash key doesn't
+ * change.  Note that the inval events themselves don't actually say whether
+ * the tuple is being inserted or deleted.  Also, since we transmit only a
+ * hash key, there is a small risk of unnecessary invalidations due to chance
+ * matches of hash keys.
+ *
+ * Note that some system catalogs have multiple caches on them (with different
+ * indexes).  On detecting a tuple invalidation in such a catalog, separate
+ * catcache inval messages must be generated for each of its caches, since
+ * the hash keys will generally be different.
+ *
+ * Catcache, relcache, and snapshot invalidations are transactional, and so
+ * are sent to other backends upon commit.  Internally to the generating
+ * backend, they are also processed at CommandCounterIncrement so that later
+ * commands in the same transaction see the new state.  The generating backend
+ * also has to process them at abort, to flush out any cache state it's loaded
+ * from no-longer-valid entries.
+ *
+ * smgr and relation mapping invalidations are non-transactional: they are
+ * sent immediately when the underlying file change is made.
+ */
+
+typedef struct
+{
+	int8		id;				/* cache ID --- must be first */
+	Oid			dbId;			/* database ID, or 0 if a shared relation */
+	uint32		hashValue;		/* hash value of key for this catcache */
+} SharedInvalCatcacheMsg;
+
+#define SHAREDINVALCATALOG_ID	(-1)
+
+typedef struct
+{
+	int8		id;				/* type field --- must be first */
+	Oid			dbId;			/* database ID, or 0 if a shared catalog */
+	Oid			catId;			/* ID of catalog whose contents are invalid */
+} SharedInvalCatalogMsg;
+
+#define SHAREDINVALRELCACHE_ID	(-2)
+
+typedef struct
+{
+	int8		id;				/* type field --- must be first */
+	Oid			dbId;			/* database ID, or 0 if a shared relation */
+	Oid			relId;			/* relation ID, or 0 if whole relcache */
+} SharedInvalRelcacheMsg;
+
+#define SHAREDINVALSMGR_ID		(-3)
+
+typedef struct
+{
+	/* note: field layout chosen to pack into 16 bytes */
+	int8		id;				/* type field --- must be first */
+	int8		backend_hi;		/* high bits of backend ID, if temprel */
+	uint16		backend_lo;		/* low bits of backend ID, if temprel */
+	RelFileNode rnode;			/* spcNode, dbNode, relNode */
+} SharedInvalSmgrMsg;
+
+#define SHAREDINVALRELMAP_ID	(-4)
+
+typedef struct
+{
+	int8		id;				/* type field --- must be first */
+	Oid			dbId;			/* database ID, or 0 for shared catalogs */
+} SharedInvalRelmapMsg;
+
+#define SHAREDINVALSNAPSHOT_ID	(-5)
+
+typedef struct
+{
+	int8		id;				/* type field --- must be first */
+	Oid			dbId;			/* database ID, or 0 if a shared relation */
+	Oid			relId;			/* relation ID */
+} SharedInvalSnapshotMsg;
+
+typedef union
+{
+	int8		id;				/* type field --- must be first */
+	SharedInvalCatcacheMsg cc;
+	SharedInvalCatalogMsg cat;
+	SharedInvalRelcacheMsg rc;
+	SharedInvalSmgrMsg sm;
+	SharedInvalRelmapMsg rm;
+	SharedInvalSnapshotMsg sn;
+} SharedInvalidationMessage;
+
+
+/* Counter of messages processed; don't worry about overflow. */
+extern uint64 SharedInvalidMessageCounter;
+
+extern volatile sig_atomic_t catchupInterruptPending;
+
+extern void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs,
+									  int n);
+extern void ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *msg),
+										 void (*resetFunction) (void));
+
+/* signal handler for catchup events (PROCSIG_CATCHUP_INTERRUPT) */
+extern void HandleCatchupInterrupt(void);
+
+/*
+ * enable/disable processing of catchup events directly from signal handler.
+ * The enable routine first performs processing of any catchup events that
+ * have occurred since the last disable.
+ */
+extern void ProcessCatchupInterrupt(void);
+
+extern int	xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
+												 bool *RelcacheInitFileInval);
+extern void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
+												 int nmsgs, bool RelcacheInitFileInval,
+												 Oid dbid, Oid tsid);
+
+extern void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg);
+
+#endif							/* SINVAL_H */
diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h
new file mode 100644
index 0000000..426f547
--- /dev/null
+++ b/src/include/storage/sinvaladt.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * sinvaladt.h
+ *	  POSTGRES shared cache invalidation data manager.
+ *
+ * The shared cache invalidation manager is responsible for transmitting
+ * invalidation messages between backends.  Any message sent by any backend
+ * must be delivered to all already-running backends before it can be
+ * forgotten.  (If we run out of space, we instead deliver a "RESET"
+ * message to backends that have fallen too far behind.)
+ *
+ * The struct type SharedInvalidationMessage, defining the contents of
+ * a single message, is defined in sinval.h.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/sinvaladt.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SINVALADT_H
+#define SINVALADT_H
+
+#include "storage/lock.h"
+#include "storage/sinval.h"
+
+/*
+ * prototypes for functions in sinvaladt.c
+ */
+extern Size SInvalShmemSize(void);
+extern void CreateSharedInvalidationState(void);
+extern void SharedInvalBackendInit(bool sendOnly);
+extern PGPROC *BackendIdGetProc(int backendID);
+extern void BackendIdGetTransactionIds(int backendID, TransactionId *xid, TransactionId *xmin);
+
+extern void SIInsertDataEntries(const SharedInvalidationMessage *data, int n);
+extern int	SIGetDataEntries(SharedInvalidationMessage *data, int datasize);
+extern void SICleanupQueue(bool callerHasWriteLock, int minFree);
+
+extern LocalTransactionId GetNextLocalTransactionId(void);
+
+#endif							/* SINVALADT_H */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
new file mode 100644
index 0000000..6566659
--- /dev/null
+++ b/src/include/storage/smgr.h
@@ -0,0 +1,109 @@
+/*-------------------------------------------------------------------------
+ *
+ * smgr.h
+ *	  storage manager switch public interface declarations.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/smgr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SMGR_H
+#define SMGR_H
+
+#include "lib/ilist.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+/*
+ * smgr.c maintains a table of SMgrRelation objects, which are essentially
+ * cached file handles.  An SMgrRelation is created (if not already present)
+ * by smgropen(), and destroyed by smgrclose().  Note that neither of these
+ * operations imply I/O, they just create or destroy a hashtable entry.
+ * (But smgrclose() may release associated resources, such as OS-level file
+ * descriptors.)
+ *
+ * An SMgrRelation may have an "owner", which is just a pointer to it from
+ * somewhere else; smgr.c will clear this pointer if the SMgrRelation is
+ * closed.  We use this to avoid dangling pointers from relcache to smgr
+ * without having to make the smgr explicitly aware of relcache.  There
+ * can't be more than one "owner" pointer per SMgrRelation, but that's
+ * all we need.
+ *
+ * SMgrRelations that do not have an "owner" are considered to be transient,
+ * and are deleted at end of transaction.
+ */
+typedef struct SMgrRelationData
+{
+	/* rnode is the hashtable lookup key, so it must be first! */
+	RelFileNodeBackend smgr_rnode;	/* relation physical identifier */
+
+	/* pointer to owning pointer, or NULL if none */
+	struct SMgrRelationData **smgr_owner;
+
+	/*
+	 * These next three fields are not actually used or manipulated by smgr,
+	 * except that they are reset to InvalidBlockNumber upon a cache flush
+	 * event (in particular, upon truncation of the relation).  Higher levels
+	 * store cached state here so that it will be reset when truncation
+	 * happens.  In all three cases, InvalidBlockNumber means "unknown".
+	 */
+	BlockNumber smgr_targblock; /* current insertion target block */
+	BlockNumber smgr_fsm_nblocks;	/* last known size of fsm fork */
+	BlockNumber smgr_vm_nblocks;	/* last known size of vm fork */
+
+	/* additional public fields may someday exist here */
+
+	/*
+	 * Fields below here are intended to be private to smgr.c and its
+	 * submodules.  Do not touch them from elsewhere.
+	 */
+	int			smgr_which;		/* storage manager selector */
+
+	/*
+	 * for md.c; per-fork arrays of the number of open segments
+	 * (md_num_open_segs) and the segments themselves (md_seg_fds).
+	 */
+	int			md_num_open_segs[MAX_FORKNUM + 1];
+	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+
+	/* if unowned, list link in list of all unowned SMgrRelations */
+	dlist_node	node;
+} SMgrRelationData;
+
+typedef SMgrRelationData *SMgrRelation;
+
+#define SmgrIsTemp(smgr) \
+	RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
+
+extern void smgrinit(void);
+extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
+extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
+extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
+extern void smgrclearowner(SMgrRelation *owner, SMgrRelation reln);
+extern void smgrclose(SMgrRelation reln);
+extern void smgrcloseall(void);
+extern void smgrclosenode(RelFileNodeBackend rnode);
+extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern void smgrdosyncall(SMgrRelation *rels, int nrels);
+extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
+extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
+					   BlockNumber blocknum, char *buffer, bool skipFsync);
+extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
+						 BlockNumber blocknum);
+extern void smgrread(SMgrRelation reln, ForkNumber forknum,
+					 BlockNumber blocknum, char *buffer);
+extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
+					  BlockNumber blocknum, char *buffer, bool skipFsync);
+extern void smgrwriteback(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber blocknum, BlockNumber nblocks);
+extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
+extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum,
+						 int nforks, BlockNumber *nblocks);
+extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
+extern void AtEOXact_SMgr(void);
+
+#endif							/* SMGR_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
new file mode 100644
index 0000000..5ad25d0
--- /dev/null
+++ b/src/include/storage/spin.h
@@ -0,0 +1,77 @@
+/*-------------------------------------------------------------------------
+ *
+ * spin.h
+ *	   Hardware-independent implementation of spinlocks.
+ *
+ *
+ *	The hardware-independent interface to spinlocks is defined by the
+ *	typedef "slock_t" and these macros:
+ *
+ *	void SpinLockInit(volatile slock_t *lock)
+ *		Initialize a spinlock (to the unlocked state).
+ *
+ *	void SpinLockAcquire(volatile slock_t *lock)
+ *		Acquire a spinlock, waiting if necessary.
+ *		Time out and abort() if unable to acquire the lock in a
+ *		"reasonable" amount of time --- typically ~ 1 minute.
+ *
+ *	void SpinLockRelease(volatile slock_t *lock)
+ *		Unlock a previously acquired lock.
+ *
+ *	bool SpinLockFree(slock_t *lock)
+ *		Tests if the lock is free. Returns true if free, false if locked.
+ *		This does *not* change the state of the lock.
+ *
+ *	Callers must beware that the macro argument may be evaluated multiple
+ *	times!
+ *
+ *	Load and store operations in calling code are guaranteed not to be
+ *	reordered with respect to these operations, because they include a
+ *	compiler barrier.  (Before PostgreSQL 9.5, callers needed to use a
+ *	volatile qualifier to access data protected by spinlocks.)
+ *
+ *	Keep in mind the coding rule that spinlocks must not be held for more
+ *	than a few instructions.  In particular, we assume it is not possible
+ *	for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
+ *	it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
+ *
+ *	These macros are implemented in terms of hardware-dependent macros
+ *	supplied by s_lock.h.  There is not currently any extra functionality
+ *	added by this header, but there has been in the past and may someday
+ *	be again.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/spin.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPIN_H
+#define SPIN_H
+
+#include "storage/s_lock.h"
+#ifndef HAVE_SPINLOCKS
+#include "storage/pg_sema.h"
+#endif
+
+
+#define SpinLockInit(lock)	S_INIT_LOCK(lock)
+
+#define SpinLockAcquire(lock) S_LOCK(lock)
+
+#define SpinLockRelease(lock) S_UNLOCK(lock)
+
+#define SpinLockFree(lock)	S_LOCK_FREE(lock)
+
+
+extern int	SpinlockSemas(void);
+extern Size SpinlockSemaSize(void);
+
+#ifndef HAVE_SPINLOCKS
+extern void SpinlockSemaInit(void);
+extern PGSemaphore *SpinlockSemaArray;
+#endif
+
+#endif							/* SPIN_H */
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
new file mode 100644
index 0000000..cfbe426
--- /dev/null
+++ b/src/include/storage/standby.h
@@ -0,0 +1,91 @@
+/*-------------------------------------------------------------------------
+ *
+ * standby.h
+ *	  Definitions for hot standby mode.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/standby.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STANDBY_H
+#define STANDBY_H
+
+#include "storage/lock.h"
+#include "storage/procsignal.h"
+#include "storage/relfilenode.h"
+#include "storage/standbydefs.h"
+
+/* User-settable GUC parameters */
+extern int	vacuum_defer_cleanup_age;
+extern int	max_standby_archive_delay;
+extern int	max_standby_streaming_delay;
+
+extern void InitRecoveryTransactionEnvironment(void);
+extern void ShutdownRecoveryTransactionEnvironment(void);
+
+extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
+												RelFileNode node);
+extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
+extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
+
+extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
+extern void ResolveRecoveryConflictWithBufferPin(void);
+extern void CheckRecoveryConflictDeadlock(void);
+extern void StandbyDeadLockHandler(void);
+extern void StandbyTimeoutHandler(void);
+extern void StandbyLockTimeoutHandler(void);
+
+/*
+ * Standby Rmgr (RM_STANDBY_ID)
+ *
+ * Standby recovery manager exists to perform actions that are required
+ * to make hot standby work. That includes logging AccessExclusiveLocks taken
+ * by transactions and running-xacts snapshots.
+ */
+extern void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid);
+extern void StandbyReleaseLockTree(TransactionId xid,
+								   int nsubxids, TransactionId *subxids);
+extern void StandbyReleaseAllLocks(void);
+extern void StandbyReleaseOldLocks(TransactionId oldxid);
+
+#define MinSizeOfXactRunningXacts offsetof(xl_running_xacts, xids)
+
+
+/*
+ * Declarations for GetRunningTransactionData(). Similar to Snapshots, but
+ * not quite. This has nothing at all to do with visibility on this server,
+ * so this is completely separate from snapmgr.c and snapmgr.h.
+ * This data is important for creating the initial snapshot state on a
+ * standby server. We need lots more information than a normal snapshot,
+ * hence we use a specific data structure for our needs. This data
+ * is written to WAL as a separate record immediately after each
+ * checkpoint. That means that wherever we start a standby from we will
+ * almost immediately see the data we need to begin executing queries.
+ */
+
+typedef struct RunningTransactionsData
+{
+	int			xcnt;			/* # of xact ids in xids[] */
+	int			subxcnt;		/* # of subxact ids in xids[] */
+	bool		subxid_overflow;	/* snapshot overflowed, subxids missing */
+	TransactionId nextXid;		/* xid from ShmemVariableCache->nextFullXid */
+	TransactionId oldestRunningXid; /* *not* oldestXmin */
+	TransactionId latestCompletedXid;	/* so we can set xmax */
+
+	TransactionId *xids;		/* array of (sub)xids still running */
+} RunningTransactionsData;
+
+typedef RunningTransactionsData *RunningTransactions;
+
+extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid);
+extern void LogAccessExclusiveLockPrepare(void);
+
+extern XLogRecPtr LogStandbySnapshot(void);
+extern void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
+									bool relcacheInitFileInval);
+
+#endif							/* STANDBY_H */
diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h
new file mode 100644
index 0000000..4876d2e
--- /dev/null
+++ b/src/include/storage/standbydefs.h
@@ -0,0 +1,74 @@
+/*-------------------------------------------------------------------------
+ *
+ * standbydefs.h
+ *	   Frontend exposed definitions for hot standby mode.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/standbydefs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STANDBYDEFS_H
+#define STANDBYDEFS_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/lockdefs.h"
+#include "storage/sinval.h"
+
+/* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
+extern void standby_redo(XLogReaderState *record);
+extern void standby_desc(StringInfo buf, XLogReaderState *record);
+extern const char *standby_identify(uint8 info);
+extern void standby_desc_invalidations(StringInfo buf,
+									   int nmsgs, SharedInvalidationMessage *msgs,
+									   Oid dbId, Oid tsId,
+									   bool relcacheInitFileInval);
+
+/*
+ * XLOG message types
+ */
+#define XLOG_STANDBY_LOCK			0x00
+#define XLOG_RUNNING_XACTS			0x10
+#define XLOG_INVALIDATIONS			0x20
+
+typedef struct xl_standby_locks
+{
+	int			nlocks;			/* number of entries in locks array */
+	xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER];
+} xl_standby_locks;
+
+/*
+ * When we write running xact data to WAL, we use this structure.
+ */
+typedef struct xl_running_xacts
+{
+	int			xcnt;			/* # of xact ids in xids[] */
+	int			subxcnt;		/* # of subxact ids in xids[] */
+	bool		subxid_overflow;	/* snapshot overflowed, subxids missing */
+	TransactionId nextXid;		/* xid from ShmemVariableCache->nextFullXid */
+	TransactionId oldestRunningXid; /* *not* oldestXmin */
+	TransactionId latestCompletedXid;	/* so we can set xmax */
+
+	TransactionId xids[FLEXIBLE_ARRAY_MEMBER];
+} xl_running_xacts;
+
+/*
+ * Invalidations for standby, currently only when transactions without an
+ * assigned xid commit.
+ */
+typedef struct xl_invalidations
+{
+	Oid			dbId;			/* MyDatabaseId */
+	Oid			tsId;			/* MyDatabaseTableSpace */
+	bool		relcacheInitFileInval;	/* invalidate relcache init files */
+	int			nmsgs;			/* number of shared inval msgs */
+	SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER];
+} xl_invalidations;
+
+#define MinSizeOfInvalidations offsetof(xl_invalidations, msgs)
+
+#endif							/* STANDBYDEFS_H */
diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h
new file mode 100644
index 0000000..e16ab8e
--- /dev/null
+++ b/src/include/storage/sync.h
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------
+ *
+ * sync.h
+ *	  File synchronization management code.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/sync.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SYNC_H
+#define SYNC_H
+
+#include "storage/relfilenode.h"
+
+/*
+ * Type of sync request.  These are used to manage the set of pending
+ * requests to call a sync handler's sync or unlink functions at the next
+ * checkpoint.
+ */
+typedef enum SyncRequestType
+{
+	SYNC_REQUEST,				/* schedule a call of sync function */
+	SYNC_UNLINK_REQUEST,		/* schedule a call of unlink function */
+	SYNC_FORGET_REQUEST,		/* forget all calls for a tag */
+	SYNC_FILTER_REQUEST			/* forget all calls satisfying match fn */
+} SyncRequestType;
+
+/*
+ * Which set of functions to use to handle a given request.  The values of
+ * the enumerators must match the indexes of the function table in sync.c.
+ */
+typedef enum SyncRequestHandler
+{
+	SYNC_HANDLER_MD = 0			/* md smgr */
+} SyncRequestHandler;
+
+/*
+ * A tag identifying a file.  Currently it has the members required for md.c's
+ * usage, but sync.c has no knowledge of the internal structure, and it is
+ * liable to change as required by future handlers.
+ */
+typedef struct FileTag
+{
+	int16		handler;		/* SyncRequestHandler value, saving space */
+	int16		forknum;		/* ForkNumber, saving space */
+	RelFileNode rnode;
+	uint32		segno;
+} FileTag;
+
+extern void InitSync(void);
+extern void SyncPreCheckpoint(void);
+extern void SyncPostCheckpoint(void);
+extern void ProcessSyncRequests(void);
+extern void RememberSyncRequest(const FileTag *ftag, SyncRequestType type);
+extern void EnableSyncRequestForwarding(void);
+extern bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
+								bool retryOnError);
+
+#endif							/* SYNC_H */
-- 
cgit v1.2.3