83 files changed, 16161 insertions, 0 deletions
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
new file mode 100644
index 0000000..d357ebb
--- /dev/null
+++ b/src/include/access/amapi.h
@@ -0,0 +1,290 @@
+/*-------------------------------------------------------------------------
+ *
+ * amapi.h
+ *	  API for Postgres index access methods.
+ *
+ * Copyright (c) 2015-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/amapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef AMAPI_H
+#define AMAPI_H
+
+#include "access/genam.h"
+
+/*
+ * We don't wish to include planner header files here, since most of an index
+ * AM's implementation isn't concerned with those data structures.  To allow
+ * declaring amcostestimate_function here, use forward struct references.
+ */
+struct PlannerInfo;
+struct IndexPath;
+
+/* Likewise, this file shouldn't depend on execnodes.h. */
+struct IndexInfo;
+
+
+/*
+ * Properties for amproperty API.  This list covers properties known to the
+ * core code, but an index AM can define its own properties, by matching the
+ * string property name.
+ */
+typedef enum IndexAMProperty
+{
+	AMPROP_UNKNOWN = 0,			/* anything not known to core code */
+	AMPROP_ASC,					/* column properties */
+	AMPROP_DESC,
+	AMPROP_NULLS_FIRST,
+	AMPROP_NULLS_LAST,
+	AMPROP_ORDERABLE,
+	AMPROP_DISTANCE_ORDERABLE,
+	AMPROP_RETURNABLE,
+	AMPROP_SEARCH_ARRAY,
+	AMPROP_SEARCH_NULLS,
+	AMPROP_CLUSTERABLE,			/* index properties */
+	AMPROP_INDEX_SCAN,
+	AMPROP_BITMAP_SCAN,
+	AMPROP_BACKWARD_SCAN,
+	AMPROP_CAN_ORDER,			/* AM properties */
+	AMPROP_CAN_UNIQUE,
+	AMPROP_CAN_MULTI_COL,
+	AMPROP_CAN_EXCLUDE,
+	AMPROP_CAN_INCLUDE
+} IndexAMProperty;
+
+/*
+ * We use lists of this struct type to keep track of both operators and
+ * support functions while building or adding to an opclass or opfamily.
+ * amadjustmembers functions receive lists of these structs, and are allowed
+ * to alter their "ref" fields.
+ *
+ * The "ref" fields define how the pg_amop or pg_amproc entry should depend
+ * on the associated objects (that is, which dependency type to use, and
+ * which opclass or opfamily it should depend on).
+ *
+ * If ref_is_hard is true, the entry will have a NORMAL dependency on the
+ * operator or support func, and an INTERNAL dependency on the opclass or
+ * opfamily.  This forces the opclass or opfamily to be dropped if the
+ * operator or support func is dropped, and requires the CASCADE option
+ * to do so.  Nor will ALTER OPERATOR FAMILY DROP be allowed.  This is
+ * the right behavior for objects that are essential to an opclass.
+ *
+ * If ref_is_hard is false, the entry will have an AUTO dependency on the
+ * operator or support func, and also an AUTO dependency on the opclass or
+ * opfamily.  This allows ALTER OPERATOR FAMILY DROP, and causes that to
+ * happen automatically if the operator or support func is dropped.  This
+ * is the right behavior for inessential ("loose") objects.
+ */
+typedef struct OpFamilyMember
+{
+	bool		is_func;		/* is this an operator, or support func? */
+	Oid			object;			/* operator or support func's OID */
+	int			number;			/* strategy or support func number */
+	Oid			lefttype;		/* lefttype */
+	Oid			righttype;		/* righttype */
+	Oid			sortfamily;		/* ordering operator's sort opfamily, or 0 */
+	bool		ref_is_hard;	/* hard or soft dependency? */
+	bool		ref_is_family;	/* is dependency on opclass or opfamily? */
+	Oid			refobjid;		/* OID of opclass or opfamily */
+} OpFamilyMember;
+
+
+/*
+ * Callback function signatures --- see indexam.sgml for more info.
+ */
+
+/* build new index */
+typedef IndexBuildResult *(*ambuild_function) (Relation heapRelation,
+											   Relation indexRelation,
+											   struct IndexInfo *indexInfo);
+
+/* build empty index */
+typedef void (*ambuildempty_function) (Relation indexRelation);
+
+/* insert this tuple */
+typedef bool (*aminsert_function) (Relation indexRelation,
+								   Datum *values,
+								   bool *isnull,
+								   ItemPointer heap_tid,
+								   Relation heapRelation,
+								   IndexUniqueCheck checkUnique,
+								   bool indexUnchanged,
+								   struct IndexInfo *indexInfo);
+
+/* bulk delete */
+typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info,
+														 IndexBulkDeleteResult *stats,
+														 IndexBulkDeleteCallback callback,
+														 void *callback_state);
+
+/* post-VACUUM cleanup */
+typedef IndexBulkDeleteResult *(*amvacuumcleanup_function) (IndexVacuumInfo *info,
+															IndexBulkDeleteResult *stats);
+
+/* can indexscan return IndexTuples? */
+typedef bool (*amcanreturn_function) (Relation indexRelation, int attno);
+
+/* estimate cost of an indexscan */
+typedef void (*amcostestimate_function) (struct PlannerInfo *root,
+										 struct IndexPath *path,
+										 double loop_count,
+										 Cost *indexStartupCost,
+										 Cost *indexTotalCost,
+										 Selectivity *indexSelectivity,
+										 double *indexCorrelation,
+										 double *indexPages);
+
+/* parse index reloptions */
+typedef bytea *(*amoptions_function) (Datum reloptions,
+									  bool validate);
+
+/* report AM, index, or index column property */
+typedef bool (*amproperty_function) (Oid index_oid, int attno,
+									 IndexAMProperty prop, const char *propname,
+									 bool *res, bool *isnull);
+
+/* name of phase as used in progress reporting */
+typedef char *(*ambuildphasename_function) (int64 phasenum);
+
+/* validate definition of an opclass for this AM */
+typedef bool (*amvalidate_function) (Oid opclassoid);
+
+/* validate operators and support functions to be added to an opclass/family */
+typedef void (*amadjustmembers_function) (Oid opfamilyoid,
+										  Oid opclassoid,
+										  List *operators,
+										  List *functions);
+
+/* prepare for index scan */
+typedef IndexScanDesc (*ambeginscan_function) (Relation indexRelation,
+											   int nkeys,
+											   int norderbys);
+
+/* (re)start index scan */
+typedef void (*amrescan_function) (IndexScanDesc scan,
+								   ScanKey keys,
+								   int nkeys,
+								   ScanKey orderbys,
+								   int norderbys);
+
+/* next valid tuple */
+typedef bool (*amgettuple_function) (IndexScanDesc scan,
+									 ScanDirection direction);
+
+/* fetch all valid tuples */
+typedef int64 (*amgetbitmap_function) (IndexScanDesc scan,
+									   TIDBitmap *tbm);
+
+/* end index scan */
+typedef void (*amendscan_function) (IndexScanDesc scan);
+
+/* mark current scan position */
+typedef void (*ammarkpos_function) (IndexScanDesc scan);
+
+/* restore marked scan position */
+typedef void (*amrestrpos_function) (IndexScanDesc scan);
+
+/*
+ * Callback function signatures - for parallel index scans.
+ */
+
+/* estimate size of parallel scan descriptor */
+typedef Size (*amestimateparallelscan_function) (void);
+
+/* prepare for parallel index scan */
+typedef void (*aminitparallelscan_function) (void *target);
+
+/* (re)start parallel index scan */
+typedef void (*amparallelrescan_function) (IndexScanDesc scan);
+
+/*
+ * API struct for an index AM.  Note this must be stored in a single palloc'd
+ * chunk of memory.
+ */
+typedef struct IndexAmRoutine
+{
+	NodeTag		type;
+
+	/*
+	 * Total number of strategies (operators) by which we can traverse/search
+	 * this AM.  Zero if AM does not have a fixed set of strategy assignments.
+	 */
+	uint16		amstrategies;
+	/* total number of support functions that this AM uses */
+	uint16		amsupport;
+	/* opclass options support function number or 0 */
+	uint16		amoptsprocnum;
+	/* does AM support ORDER BY indexed column's value? */
+	bool		amcanorder;
+	/* does AM support ORDER BY result of an operator on indexed column? */
+	bool		amcanorderbyop;
+	/* does AM support backward scanning? */
+	bool		amcanbackward;
+	/* does AM support UNIQUE indexes? */
+	bool		amcanunique;
+	/* does AM support multi-column indexes? */
+	bool		amcanmulticol;
+	/* does AM require scans to have a constraint on the first index column? */
+	bool		amoptionalkey;
+	/* does AM handle ScalarArrayOpExpr quals? */
+	bool		amsearcharray;
+	/* does AM handle IS NULL/IS NOT NULL quals? */
+	bool		amsearchnulls;
+	/* can index storage data type differ from column data type? */
+	bool		amstorage;
+	/* can an index of this type be clustered on? */
+	bool		amclusterable;
+	/* does AM handle predicate locks? */
+	bool		ampredlocks;
+	/* does AM support parallel scan? */
+	bool		amcanparallel;
+	/* does AM support columns included with clause INCLUDE? */
+	bool		amcaninclude;
+	/* does AM use maintenance_work_mem? */
+	bool		amusemaintenanceworkmem;
+	/* OR of parallel vacuum flags.  See vacuum.h for flags. */
+	uint8		amparallelvacuumoptions;
+	/* type of data stored in index, or InvalidOid if variable */
+	Oid			amkeytype;
+
+	/*
+	 * If you add new properties to either the above or the below lists, then
+	 * they should also (usually) be exposed via the property API (see
+	 * IndexAMProperty at the top of the file, and utils/adt/amutils.c).
+	 */
+
+	/* interface functions */
+	ambuild_function ambuild;
+	ambuildempty_function ambuildempty;
+	aminsert_function aminsert;
+	ambulkdelete_function ambulkdelete;
+	amvacuumcleanup_function amvacuumcleanup;
+	amcanreturn_function amcanreturn;	/* can be NULL */
+	amcostestimate_function amcostestimate;
+	amoptions_function amoptions;
+	amproperty_function amproperty; /* can be NULL */
+	ambuildphasename_function ambuildphasename; /* can be NULL */
+	amvalidate_function amvalidate;
+	amadjustmembers_function amadjustmembers;	/* can be NULL */
+	ambeginscan_function ambeginscan;
+	amrescan_function amrescan;
+	amgettuple_function amgettuple; /* can be NULL */
+	amgetbitmap_function amgetbitmap;	/* can be NULL */
+	amendscan_function amendscan;
+	ammarkpos_function ammarkpos;	/* can be NULL */
+	amrestrpos_function amrestrpos; /* can be NULL */
+
+	/* interface functions to support parallel index scans */
+	amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
+	aminitparallelscan_function aminitparallelscan; /* can be NULL */
+	amparallelrescan_function amparallelrescan; /* can be NULL */
+} IndexAmRoutine;
+
+
+/* Functions in access/index/amapi.c */
+extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler);
+extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror);
+
+#endif							/* AMAPI_H */
diff --git a/src/include/access/amvalidate.h b/src/include/access/amvalidate.h
new file mode 100644
index 0000000..df02fba
--- /dev/null
+++ b/src/include/access/amvalidate.h
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * amvalidate.h
+ *	  Support routines for index access methods' amvalidate and
+ *	  amadjustmembers functions.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/amvalidate.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef AMVALIDATE_H
+#define AMVALIDATE_H
+
+#include "utils/catcache.h"
+
+
+/* Struct returned (in a list) by identify_opfamily_groups() */
+typedef struct OpFamilyOpFuncGroup
+{
+	Oid			lefttype;		/* amoplefttype/amproclefttype */
+	Oid			righttype;		/* amoprighttype/amprocrighttype */
+	uint64		operatorset;	/* bitmask of operators with these types */
+	uint64		functionset;	/* bitmask of support funcs with these types */
+} OpFamilyOpFuncGroup;
+
+
+/* Functions in access/index/amvalidate.c */
+extern List *identify_opfamily_groups(CatCList *oprlist, CatCList *proclist);
+extern bool check_amproc_signature(Oid funcid, Oid restype, bool exact,
+								   int minargs, int maxargs,...);
+extern bool check_amoptsproc_signature(Oid funcid);
+extern bool check_amop_signature(Oid opno, Oid restype,
+								 Oid lefttype, Oid righttype);
+extern Oid	opclass_for_family_datatype(Oid amoid, Oid opfamilyoid,
+										Oid datatypeoid);
+extern bool opfamily_can_sort_type(Oid opfamilyoid, Oid datatypeoid);
+
+#endif							/* AMVALIDATE_H */
diff --git a/src/include/access/attmap.h b/src/include/access/attmap.h
new file mode 100644
index 0000000..778fa27
--- /dev/null
+++ b/src/include/access/attmap.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * attmap.h
+ *	  Definitions for PostgreSQL attribute mappings
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/attmap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATTMAP_H
+#define ATTMAP_H
+
+#include "access/attnum.h"
+#include "access/tupdesc.h"
+
+/*
+ * Attribute mapping structure
+ *
+ * This maps attribute numbers between a pair of relations, designated
+ * 'input' and 'output' (most typically inheritance parent and child
+ * relations), whose common columns may have different attribute numbers.
+ * Such difference may arise due to the columns being ordered differently
+ * in the two relations or the two relations having dropped columns at
+ * different positions.
+ *
+ * 'maplen' is set to the number of attributes of the 'output' relation,
+ * taking into account any of its dropped attributes, with the corresponding
+ * elements of the 'attnums' array set to 0.
+ */
+typedef struct AttrMap
+{
+	AttrNumber *attnums;
+	int			maplen;
+} AttrMap;
+
+extern AttrMap *make_attrmap(int maplen);
+extern void free_attrmap(AttrMap *map);
+
+/* Conversion routines to build mappings */
+extern AttrMap *build_attrmap_by_name(TupleDesc indesc,
+									  TupleDesc outdesc);
+extern AttrMap *build_attrmap_by_name_if_req(TupleDesc indesc,
+											 TupleDesc outdesc);
+extern AttrMap *build_attrmap_by_position(TupleDesc indesc,
+										  TupleDesc outdesc,
+										  const char *msg);
+
+#endif							/* ATTMAP_H */
diff --git a/src/include/access/attnum.h b/src/include/access/attnum.h
new file mode 100644
index 0000000..0c43e26
--- /dev/null
+++ b/src/include/access/attnum.h
@@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * attnum.h
+ *	  POSTGRES attribute number definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/attnum.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATTNUM_H
+#define ATTNUM_H
+
+
+/*
+ * user defined attribute numbers start at 1.   -ay 2/95
+ */
+typedef int16 AttrNumber;
+
+#define InvalidAttrNumber		0
+#define MaxAttrNumber			32767
+
+/* ----------------
+ *		support macros
+ * ----------------
+ */
+/*
+ * AttributeNumberIsValid
+ *		True iff the attribute number is valid.
+ */
+#define AttributeNumberIsValid(attributeNumber) \
+	((bool) ((attributeNumber) != InvalidAttrNumber))
+
+/*
+ * AttrNumberIsForUserDefinedAttr
+ *		True iff the attribute number corresponds to an user defined attribute.
+ */
+#define AttrNumberIsForUserDefinedAttr(attributeNumber) \
+	((bool) ((attributeNumber) > 0))
+
+/*
+ * AttrNumberGetAttrOffset
+ *		Returns the attribute offset for an attribute number.
+ *
+ * Note:
+ *		Assumes the attribute number is for a user defined attribute.
+ */
+#define AttrNumberGetAttrOffset(attNum) \
+( \
+	AssertMacro(AttrNumberIsForUserDefinedAttr(attNum)), \
+	((attNum) - 1) \
+)
+
+/*
+ * AttrOffsetGetAttrNumber
+ *		Returns the attribute number for an attribute offset.
+ */
+#define AttrOffsetGetAttrNumber(attributeOffset) \
+	 ((AttrNumber) (1 + (attributeOffset)))
+
+#endif							/* ATTNUM_H */
diff --git a/src/include/access/brin.h b/src/include/access/brin.h
new file mode 100644
index 0000000..4e2be13
--- /dev/null
+++ b/src/include/access/brin.h
@@ -0,0 +1,55 @@
+/*
+ * AM-callable functions for BRIN indexes
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/include/access/brin.h
+ */
+#ifndef BRIN_H
+#define BRIN_H
+
+#include "nodes/execnodes.h"
+#include "utils/relcache.h"
+
+
+/*
+ * Storage type for BRIN's reloptions
+ */
+typedef struct BrinOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	BlockNumber pagesPerRange;
+	bool		autosummarize;
+} BrinOptions;
+
+
+/*
+ * BrinStatsData represents stats data for planner use
+ */
+typedef struct BrinStatsData
+{
+	BlockNumber pagesPerRange;
+	BlockNumber revmapNumPages;
+} BrinStatsData;
+
+
+#define BRIN_DEFAULT_PAGES_PER_RANGE	128
+#define BrinGetPagesPerRange(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == BRIN_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((BrinOptions *) (relation)->rd_options)->pagesPerRange : \
+	  BRIN_DEFAULT_PAGES_PER_RANGE)
+#define BrinGetAutoSummarize(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == BRIN_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((BrinOptions *) (relation)->rd_options)->autosummarize : \
+	  false)
+
+
+extern void brinGetStats(Relation index, BrinStatsData *stats);
+
+#endif							/* BRIN_H */
diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h
new file mode 100644
index 0000000..79440eb
--- /dev/null
+++ b/src/include/access/brin_internal.h
@@ -0,0 +1,115 @@
+/*
+ * brin_internal.h
+ *		internal declarations for BRIN indexes
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/include/access/brin_internal.h
+ */
+#ifndef BRIN_INTERNAL_H
+#define BRIN_INTERNAL_H
+
+#include "access/amapi.h"
+#include "storage/bufpage.h"
+#include "utils/typcache.h"
+
+
+/*
+ * A BrinDesc is a struct designed to enable decoding a BRIN tuple from the
+ * on-disk format to an in-memory tuple and vice-versa.
+ */
+
+/* struct returned by "OpcInfo" amproc */
+typedef struct BrinOpcInfo
+{
+	/* Number of columns stored in an index column of this opclass */
+	uint16		oi_nstored;
+
+	/* Regular processing of NULLs in BrinValues? */
+	bool		oi_regular_nulls;
+
+	/* Opaque pointer for the opclass' private use */
+	void	   *oi_opaque;
+
+	/* Type cache entries of the stored columns */
+	TypeCacheEntry *oi_typcache[FLEXIBLE_ARRAY_MEMBER];
+} BrinOpcInfo;
+
+/* the size of a BrinOpcInfo for the given number of columns */
+#define SizeofBrinOpcInfo(ncols) \
+	(offsetof(BrinOpcInfo, oi_typcache) + sizeof(TypeCacheEntry *) * ncols)
+
+typedef struct BrinDesc
+{
+	/* Containing memory context */
+	MemoryContext bd_context;
+
+	/* the index relation itself */
+	Relation	bd_index;
+
+	/* tuple descriptor of the index relation */
+	TupleDesc	bd_tupdesc;
+
+	/* cached copy for on-disk tuples; generated at first use */
+	TupleDesc	bd_disktdesc;
+
+	/* total number of Datum entries that are stored on-disk for all columns */
+	int			bd_totalstored;
+
+	/* per-column info; bd_tupdesc->natts entries long */
+	BrinOpcInfo *bd_info[FLEXIBLE_ARRAY_MEMBER];
+} BrinDesc;
+
+/*
+ * Globally-known function support numbers for BRIN indexes.  Individual
+ * opclasses can define more function support numbers, which must fall into
+ * BRIN_FIRST_OPTIONAL_PROCNUM .. BRIN_LAST_OPTIONAL_PROCNUM.
+ */
+#define BRIN_PROCNUM_OPCINFO		1
+#define BRIN_PROCNUM_ADDVALUE		2
+#define BRIN_PROCNUM_CONSISTENT		3
+#define BRIN_PROCNUM_UNION			4
+#define BRIN_MANDATORY_NPROCS		4
+#define BRIN_PROCNUM_OPTIONS 		5	/* optional */
+/* procedure numbers up to 10 are reserved for BRIN future expansion */
+#define BRIN_FIRST_OPTIONAL_PROCNUM 11
+#define BRIN_LAST_OPTIONAL_PROCNUM	15
+
+#undef BRIN_DEBUG
+
+#ifdef BRIN_DEBUG
+#define BRIN_elog(args)			elog args
+#else
+#define BRIN_elog(args)			((void) 0)
+#endif
+
+/* brin.c */
+extern BrinDesc *brin_build_desc(Relation rel);
+extern void brin_free_desc(BrinDesc *bdesc);
+extern IndexBuildResult *brinbuild(Relation heap, Relation index,
+								   struct IndexInfo *indexInfo);
+extern void brinbuildempty(Relation index);
+extern bool brininsert(Relation idxRel, Datum *values, bool *nulls,
+					   ItemPointer heaptid, Relation heapRel,
+					   IndexUniqueCheck checkUnique,
+					   bool indexUnchanged,
+					   struct IndexInfo *indexInfo);
+extern IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys);
+extern int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern void brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+					   ScanKey orderbys, int norderbys);
+extern void brinendscan(IndexScanDesc scan);
+extern IndexBulkDeleteResult *brinbulkdelete(IndexVacuumInfo *info,
+											 IndexBulkDeleteResult *stats,
+											 IndexBulkDeleteCallback callback,
+											 void *callback_state);
+extern IndexBulkDeleteResult *brinvacuumcleanup(IndexVacuumInfo *info,
+												IndexBulkDeleteResult *stats);
+extern bytea *brinoptions(Datum reloptions, bool validate);
+
+/* brin_validate.c */
+extern bool brinvalidate(Oid opclassoid);
+
+#endif							/* BRIN_INTERNAL_H */
diff --git a/src/include/access/brin_page.h b/src/include/access/brin_page.h
new file mode 100644
index 0000000..75de538
--- /dev/null
+++ b/src/include/access/brin_page.h
@@ -0,0 +1,96 @@
+/*
+ * brin_page.h
+ *		Prototypes and definitions for BRIN page layouts
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/include/access/brin_page.h
+ *
+ * NOTES
+ *
+ * These structs should really be private to specific BRIN files, but it's
+ * useful to have them here so that they can be used by pageinspect and similar
+ * tools.
+ */
+#ifndef BRIN_PAGE_H
+#define BRIN_PAGE_H
+
+#include "storage/block.h"
+#include "storage/itemptr.h"
+
+/*
+ * Special area of BRIN pages.
+ *
+ * We define it in this odd way so that it always occupies the last
+ * MAXALIGN-sized element of each page.
+ */
+typedef struct BrinSpecialSpace
+{
+	uint16		vector[MAXALIGN(1) / sizeof(uint16)];
+} BrinSpecialSpace;
+
+/*
+ * Make the page type be the last half-word in the page, for consumption by
+ * pg_filedump and similar utilities.  We don't really care much about the
+ * position of the "flags" half-word, but it's simpler to apply a consistent
+ * rule to both.
+ *
+ * See comments above GinPageOpaqueData.
+ */
+#define BrinPageType(page)		\
+	(((BrinSpecialSpace *)		\
+	  PageGetSpecialPointer(page))->vector[MAXALIGN(1) / sizeof(uint16) - 1])
+
+#define BrinPageFlags(page)		\
+	(((BrinSpecialSpace *)		\
+	  PageGetSpecialPointer(page))->vector[MAXALIGN(1) / sizeof(uint16) - 2])
+
+/* special space on all BRIN pages stores a "type" identifier */
+#define		BRIN_PAGETYPE_META			0xF091
+#define		BRIN_PAGETYPE_REVMAP		0xF092
+#define		BRIN_PAGETYPE_REGULAR		0xF093
+
+#define BRIN_IS_META_PAGE(page) (BrinPageType(page) == BRIN_PAGETYPE_META)
+#define BRIN_IS_REVMAP_PAGE(page) (BrinPageType(page) == BRIN_PAGETYPE_REVMAP)
+#define BRIN_IS_REGULAR_PAGE(page) (BrinPageType(page) == BRIN_PAGETYPE_REGULAR)
+
+/* flags for BrinSpecialSpace */
+#define		BRIN_EVACUATE_PAGE			(1 << 0)
+
+
+/* Metapage definitions */
+typedef struct BrinMetaPageData
+{
+	uint32		brinMagic;
+	uint32		brinVersion;
+	BlockNumber pagesPerRange;
+	BlockNumber lastRevmapPage;
+} BrinMetaPageData;
+
+#define BRIN_CURRENT_VERSION		1
+#define BRIN_META_MAGIC			0xA8109CFA
+
+#define BRIN_METAPAGE_BLKNO		0
+
+/* Definitions for revmap pages */
+typedef struct RevmapContents
+{
+	/*
+	 * This array will fill all available space on the page.  It should be
+	 * declared [FLEXIBLE_ARRAY_MEMBER], but for some reason you can't do that
+	 * in an otherwise-empty struct.
+	 */
+	ItemPointerData rm_tids[1];
+} RevmapContents;
+
+#define REVMAP_CONTENT_SIZE \
+	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+	 offsetof(RevmapContents, rm_tids) - \
+	 MAXALIGN(sizeof(BrinSpecialSpace)))
+/* max num of items in the array */
+#define REVMAP_PAGE_MAXITEMS \
+	(REVMAP_CONTENT_SIZE / sizeof(ItemPointerData))
+
+#endif							/* BRIN_PAGE_H */
diff --git a/src/include/access/brin_pageops.h b/src/include/access/brin_pageops.h
new file mode 100644
index 0000000..c2e8a2a
--- /dev/null
+++ b/src/include/access/brin_pageops.h
@@ -0,0 +1,38 @@
+/*
+ * brin_pageops.h
+ *		Prototypes for operating on BRIN pages.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/access/brin_pageops.h
+ */
+#ifndef BRIN_PAGEOPS_H
+#define BRIN_PAGEOPS_H
+
+#include "access/brin_revmap.h"
+
+extern bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+						  BrinRevmap *revmap, BlockNumber heapBlk,
+						  Buffer oldbuf, OffsetNumber oldoff,
+						  const BrinTuple *origtup, Size origsz,
+						  const BrinTuple *newtup, Size newsz,
+						  bool samepage);
+extern bool brin_can_do_samepage_update(Buffer buffer, Size origsz,
+										Size newsz);
+extern OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+								  BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
+								  BrinTuple *tup, Size itemsz);
+
+extern void brin_page_init(Page page, uint16 type);
+extern void brin_metapage_init(Page page, BlockNumber pagesPerRange,
+							   uint16 version);
+
+extern bool brin_start_evacuating_page(Relation idxRel, Buffer buf);
+extern void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
+							   BrinRevmap *revmap, Buffer buf);
+
+extern void brin_page_cleanup(Relation idxrel, Buffer buf);
+
+#endif							/* BRIN_PAGEOPS_H */
diff --git a/src/include/access/brin_revmap.h b/src/include/access/brin_revmap.h
new file mode 100644
index 0000000..4259fe8
--- /dev/null
+++ b/src/include/access/brin_revmap.h
@@ -0,0 +1,41 @@
+/*
+ * brin_revmap.h
+ *		Prototypes for BRIN reverse range maps
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/include/access/brin_revmap.h
+ */
+
+#ifndef BRIN_REVMAP_H
+#define BRIN_REVMAP_H
+
+#include "access/brin_tuple.h"
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/itemptr.h"
+#include "storage/off.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+/* struct definition lives in brin_revmap.c */
+typedef struct BrinRevmap BrinRevmap;
+
+extern BrinRevmap *brinRevmapInitialize(Relation idxrel,
+										BlockNumber *pagesPerRange, Snapshot snapshot);
+extern void brinRevmapTerminate(BrinRevmap *revmap);
+
+extern void brinRevmapExtend(BrinRevmap *revmap,
+							 BlockNumber heapBlk);
+extern Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap,
+										  BlockNumber heapBlk);
+extern void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
+									BlockNumber heapBlk, ItemPointerData tid);
+extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap,
+										   BlockNumber heapBlk, Buffer *buf, OffsetNumber *off,
+										   Size *size, int mode, Snapshot snapshot);
+extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk);
+
+#endif							/* BRIN_REVMAP_H */
diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h
new file mode 100644
index 0000000..c80341f
--- /dev/null
+++ b/src/include/access/brin_tuple.h
@@ -0,0 +1,110 @@
+/*
+ * brin_tuple.h
+ *		Declarations for dealing with BRIN-specific tuples.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/access/brin_tuple.h
+ */
+#ifndef BRIN_TUPLE_H
+#define BRIN_TUPLE_H
+
+#include "access/brin_internal.h"
+#include "access/tupdesc.h"
+
+/*
+ * The BRIN opclasses may register serialization callback, in case the on-disk
+ * and in-memory representations differ (e.g. for performance reasons).
+ */
+typedef void (*brin_serialize_callback_type) (BrinDesc *bdesc, Datum src, Datum *dst);
+
+/*
+ * A BRIN index stores one index tuple per page range.  Each index tuple
+ * has one BrinValues struct for each indexed column; in turn, each BrinValues
+ * has (besides the null flags) an array of Datum whose size is determined by
+ * the opclass.
+ */
+typedef struct BrinValues
+{
+	AttrNumber	bv_attno;		/* index attribute number */
+	bool		bv_hasnulls;	/* are there any nulls in the page range? */
+	bool		bv_allnulls;	/* are all values nulls in the page range? */
+	Datum	   *bv_values;		/* current accumulated values */
+	Datum		bv_mem_value;	/* expanded accumulated values */
+	MemoryContext bv_context;
+	brin_serialize_callback_type bv_serialize;
+} BrinValues;
+
+/*
+ * This struct is used to represent an in-memory index tuple.  The values can
+ * only be meaningfully decoded with an appropriate BrinDesc.
+ */
+typedef struct BrinMemTuple
+{
+	bool		bt_placeholder; /* this is a placeholder tuple */
+	BlockNumber bt_blkno;		/* heap blkno that the tuple is for */
+	MemoryContext bt_context;	/* memcxt holding the bt_columns values */
+	/* output arrays for brin_deform_tuple: */
+	Datum	   *bt_values;		/* values array */
+	bool	   *bt_allnulls;	/* allnulls array */
+	bool	   *bt_hasnulls;	/* hasnulls array */
+	/* not an output array, but must be last */
+	BrinValues	bt_columns[FLEXIBLE_ARRAY_MEMBER];
+} BrinMemTuple;
+
+/*
+ * An on-disk BRIN tuple.  This is possibly followed by a nulls bitmask, with
+ * room for 2 null bits (two bits for each indexed column); an opclass-defined
+ * number of Datum values for each column follow.
+ */
+typedef struct BrinTuple
+{
+	/* heap block number that the tuple is for */
+	BlockNumber bt_blkno;
+
+	/* ---------------
+	 * bt_info is laid out in the following fashion:
+	 *
+	 * 7th (high) bit: has nulls
+	 * 6th bit: is placeholder tuple
+	 * 5th bit: unused
+	 * 4-0 bit: offset of data
+	 * ---------------
+	 */
+	uint8		bt_info;
+} BrinTuple;
+
+#define SizeOfBrinTuple (offsetof(BrinTuple, bt_info) + sizeof(uint8))
+
+/*
+ * bt_info manipulation macros
+ */
+#define BRIN_OFFSET_MASK		0x1F
+/* bit 0x20 is not used at present */
+#define BRIN_PLACEHOLDER_MASK	0x40
+#define BRIN_NULLS_MASK			0x80
+
+#define BrinTupleDataOffset(tup)	((Size) (((BrinTuple *) (tup))->bt_info & BRIN_OFFSET_MASK))
+#define BrinTupleHasNulls(tup)	(((((BrinTuple *) (tup))->bt_info & BRIN_NULLS_MASK)) != 0)
+#define BrinTupleIsPlaceholder(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_PLACEHOLDER_MASK)) != 0)
+
+
+extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
+								  BrinMemTuple *tuple, Size *size);
+extern BrinTuple *brin_form_placeholder_tuple(BrinDesc *brdesc,
+											  BlockNumber blkno, Size *size);
+extern void brin_free_tuple(BrinTuple *tuple);
+extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len,
+								  BrinTuple *dest, Size *destsz);
+extern bool brin_tuples_equal(const BrinTuple *a, Size alen,
+							  const BrinTuple *b, Size blen);
+
+extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
+extern BrinMemTuple *brin_memtuple_initialize(BrinMemTuple *dtuple,
+											  BrinDesc *brdesc);
+extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
+									   BrinTuple *tuple, BrinMemTuple *dMemtuple);
+
+#endif							/* BRIN_TUPLE_H */
diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h
new file mode 100644
index 0000000..ace8aa0
--- /dev/null
+++ b/src/include/access/brin_xlog.h
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------
+ *
+ * brin_xlog.h
+ *	  POSTGRES BRIN access XLOG definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/brin_xlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BRIN_XLOG_H
+#define BRIN_XLOG_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/bufpage.h"
+#include "storage/itemptr.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+
+/*
+ * WAL record definitions for BRIN's WAL operations
+ *
+ * XLOG allows to store some information in high 4 bits of log
+ * record xl_info field.
+ */
+#define XLOG_BRIN_CREATE_INDEX		0x00
+#define XLOG_BRIN_INSERT			0x10
+#define XLOG_BRIN_UPDATE			0x20
+#define XLOG_BRIN_SAMEPAGE_UPDATE	0x30
+#define XLOG_BRIN_REVMAP_EXTEND		0x40
+#define XLOG_BRIN_DESUMMARIZE		0x50
+
+#define XLOG_BRIN_OPMASK			0x70
+/*
+ * When we insert the first item on a new page, we restore the entire page in
+ * redo.
+ */
+#define XLOG_BRIN_INIT_PAGE		0x80
+
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
+typedef struct xl_brin_createidx
+{
+	BlockNumber pagesPerRange;
+	uint16		version;
+} xl_brin_createidx;
+#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
+
+/*
+ * This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
+ */
+typedef struct xl_brin_insert
+{
+	BlockNumber heapBlk;
+
+	/* extra information needed to update the revmap */
+	BlockNumber pagesPerRange;
+
+	/* offset number in the main page to insert the tuple to. */
+	OffsetNumber offnum;
+} xl_brin_insert;
+
+#define SizeOfBrinInsert	(offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
+
+/*
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xl_brin_insert:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
+ */
+typedef struct xl_brin_update
+{
+	/* offset number of old tuple on old page */
+	OffsetNumber oldOffnum;
+
+	xl_brin_insert insert;
+} xl_brin_update;
+
+#define SizeOfBrinUpdate	(offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
+
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
+typedef struct xl_brin_samepage_update
+{
+	OffsetNumber offnum;
+} xl_brin_samepage_update;
+
+#define SizeOfBrinSamepageUpdate		(sizeof(OffsetNumber))
+
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
+typedef struct xl_brin_revmap_extend
+{
+	/*
+	 * XXX: This is actually redundant - the block number is stored as part of
+	 * backup block 1.
+	 */
+	BlockNumber targetBlk;
+} xl_brin_revmap_extend;
+
+#define SizeOfBrinRevmapExtend	(offsetof(xl_brin_revmap_extend, targetBlk) + \
+								 sizeof(BlockNumber))
+
+/*
+ * This is what we need to know about a range de-summarization
+ *
+ * Backup block 0: revmap page
+ * Backup block 1: regular page
+ */
+typedef struct xl_brin_desummarize
+{
+	BlockNumber pagesPerRange;
+	/* page number location to set to invalid */
+	BlockNumber heapBlk;
+	/* offset of item to delete in regular index page */
+	OffsetNumber regOffset;
+} xl_brin_desummarize;
+
+#define SizeOfBrinDesummarize	(offsetof(xl_brin_desummarize, regOffset) + \
+								 sizeof(OffsetNumber))
+
+
+extern void brin_redo(XLogReaderState *record);
+extern void brin_desc(StringInfo buf, XLogReaderState *record);
+extern const char *brin_identify(uint8 info);
+extern void brin_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* BRIN_XLOG_H */
diff --git a/src/include/access/bufmask.h b/src/include/access/bufmask.h
new file mode 100644
index 0000000..add6c9a
--- /dev/null
+++ b/src/include/access/bufmask.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * bufmask.h
+ *	  Definitions for buffer masking routines, used to mask certain bits
+ *	  in a page which can be different when the WAL is generated
+ *	  and when the WAL is applied. This is really the job of each
+ *	  individual rmgr, but we make things easier by providing some
+ *	  common routines to handle cases which occur in multiple rmgrs.
+ *
+ * Portions Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/bufmask.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef BUFMASK_H
+#define BUFMASK_H
+
+#include "storage/block.h"
+#include "storage/bufmgr.h"
+
+/* Marker used to mask pages consistently */
+#define MASK_MARKER		0
+
+extern void mask_page_lsn_and_checksum(Page page);
+extern void mask_page_hint_bits(Page page);
+extern void mask_unused_space(Page page);
+extern void mask_lp_flags(Page page);
+extern void mask_page_content(Page page);
+
+#endif
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
new file mode 100644
index 0000000..39b8e4a
--- /dev/null
+++ b/src/include/access/clog.h
@@ -0,0 +1,63 @@
+/*
+ * clog.h
+ *
+ * PostgreSQL transaction-commit-log manager
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/clog.h
+ */
+#ifndef CLOG_H
+#define CLOG_H
+
+#include "access/xlogreader.h"
+#include "storage/sync.h"
+#include "lib/stringinfo.h"
+
+/*
+ * Possible transaction statuses --- note that all-zeroes is the initial
+ * state.
+ *
+ * A "subcommitted" transaction is a committed subtransaction whose parent
+ * hasn't committed or aborted yet.
+ */
+typedef int XidStatus;
+
+#define TRANSACTION_STATUS_IN_PROGRESS		0x00
+#define TRANSACTION_STATUS_COMMITTED		0x01
+#define TRANSACTION_STATUS_ABORTED			0x02
+#define TRANSACTION_STATUS_SUB_COMMITTED	0x03
+
+typedef struct xl_clog_truncate
+{
+	int			pageno;
+	TransactionId oldestXact;
+	Oid			oldestXactDb;
+} xl_clog_truncate;
+
+extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
+									   TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
+extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
+
+extern Size CLOGShmemBuffers(void);
+extern Size CLOGShmemSize(void);
+extern void CLOGShmemInit(void);
+extern void BootStrapCLOG(void);
+extern void StartupCLOG(void);
+extern void TrimCLOG(void);
+extern void CheckPointCLOG(void);
+extern void ExtendCLOG(TransactionId newestXact);
+extern void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid);
+
+extern int	clogsyncfiletag(const FileTag *ftag, char *path);
+
+/* XLOG stuff */
+#define CLOG_ZEROPAGE		0x00
+#define CLOG_TRUNCATE		0x10
+
+extern void clog_redo(XLogReaderState *record);
+extern void clog_desc(StringInfo buf, XLogReaderState *record);
+extern const char *clog_identify(uint8 info);
+
+#endif							/* CLOG_H */
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
new file mode 100644
index 0000000..e045dd4
--- /dev/null
+++ b/src/include/access/commit_ts.h
@@ -0,0 +1,78 @@
+/*
+ * commit_ts.h
+ *
+ * PostgreSQL commit timestamp manager
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/commit_ts.h
+ */
+#ifndef COMMIT_TS_H
+#define COMMIT_TS_H
+
+#include "access/xlog.h"
+#include "datatype/timestamp.h"
+#include "replication/origin.h"
+#include "storage/sync.h"
+#include "utils/guc.h"
+
+
+extern PGDLLIMPORT bool track_commit_timestamp;
+
+extern bool check_track_commit_timestamp(bool *newval, void **extra,
+										 GucSource source);
+
+extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
+										   TransactionId *subxids, TimestampTz timestamp,
+										   RepOriginId nodeid);
+extern bool TransactionIdGetCommitTsData(TransactionId xid,
+										 TimestampTz *ts, RepOriginId *nodeid);
+extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
+										   RepOriginId *nodeid);
+
+extern Size CommitTsShmemBuffers(void);
+extern Size CommitTsShmemSize(void);
+extern void CommitTsShmemInit(void);
+extern void BootStrapCommitTs(void);
+extern void StartupCommitTs(void);
+extern void CommitTsParameterChange(bool newvalue, bool oldvalue);
+extern void CompleteCommitTsInitialization(void);
+extern void CheckPointCommitTs(void);
+extern void ExtendCommitTs(TransactionId newestXact);
+extern void TruncateCommitTs(TransactionId oldestXact);
+extern void SetCommitTsLimit(TransactionId oldestXact,
+							 TransactionId newestXact);
+extern void AdvanceOldestCommitTsXid(TransactionId oldestXact);
+
+extern int	committssyncfiletag(const FileTag *ftag, char *path);
+
+/* XLOG stuff */
+#define COMMIT_TS_ZEROPAGE		0x00
+#define COMMIT_TS_TRUNCATE		0x10
+
+typedef struct xl_commit_ts_set
+{
+	TimestampTz timestamp;
+	RepOriginId nodeid;
+	TransactionId mainxid;
+	/* subxact Xids follow */
+}			xl_commit_ts_set;
+
+#define SizeOfCommitTsSet	(offsetof(xl_commit_ts_set, mainxid) + \
+							 sizeof(TransactionId))
+
+typedef struct xl_commit_ts_truncate
+{
+	int			pageno;
+	TransactionId oldestXid;
+} xl_commit_ts_truncate;
+
+#define SizeOfCommitTsTruncate	(offsetof(xl_commit_ts_truncate, oldestXid) + \
+								 sizeof(TransactionId))
+
+extern void commit_ts_redo(XLogReaderState *record);
+extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
+extern const char *commit_ts_identify(uint8 info);
+
+#endif							/* COMMIT_TS_H */
diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h
new file mode 100644
index 0000000..773a02f
--- /dev/null
+++ b/src/include/access/detoast.h
@@ -0,0 +1,82 @@
+/*-------------------------------------------------------------------------
+ *
+ * detoast.h
+ *	  Access to compressed and external varlena values.
+ *
+ * Copyright (c) 2000-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/detoast.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DETOAST_H
+#define DETOAST_H
+
+/*
+ * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
+ * into a local "struct varatt_external" toast pointer.  This should be
+ * just a memcpy, but some versions of gcc seem to produce broken code
+ * that assumes the datum contents are aligned.  Introducing an explicit
+ * intermediate "varattrib_1b_e *" variable seems to fix it.
+ */
+#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
+do { \
+	varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
+	Assert(VARATT_IS_EXTERNAL(attre)); \
+	Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
+	memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
+} while (0)
+
+/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
+#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_external))
+
+/* Size of an EXTERNAL datum that contains an indirection pointer */
+#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect))
+
+/* ----------
+ * detoast_external_attr() -
+ *
+ *		Fetches an external stored attribute from the toast
+ *		relation. Does NOT decompress it, if stored external
+ *		in compressed format.
+ * ----------
+ */
+extern struct varlena *detoast_external_attr(struct varlena *attr);
+
+/* ----------
+ * detoast_attr() -
+ *
+ *		Fully detoasts one attribute, fetching and/or decompressing
+ *		it as needed.
+ * ----------
+ */
+extern struct varlena *detoast_attr(struct varlena *attr);
+
+/* ----------
+ * detoast_attr_slice() -
+ *
+ *		Fetches only the specified portion of an attribute.
+ *		(Handles all cases for attribute storage)
+ * ----------
+ */
+extern struct varlena *detoast_attr_slice(struct varlena *attr,
+										  int32 sliceoffset,
+										  int32 slicelength);
+
+/* ----------
+ * toast_raw_datum_size -
+ *
+ *	Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
+
+/* ----------
+ * toast_datum_size -
+ *
+ *	Return the storage size of a varlena datum
+ * ----------
+ */
+extern Size toast_datum_size(Datum value);
+
+#endif							/* DETOAST_H */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
new file mode 100644
index 0000000..480a476
--- /dev/null
+++ b/src/include/access/genam.h
@@ -0,0 +1,231 @@
+/*-------------------------------------------------------------------------
+ *
+ * genam.h
+ *	  POSTGRES generalized index access method definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/genam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GENAM_H
+#define GENAM_H
+
+#include "access/sdir.h"
+#include "access/skey.h"
+#include "nodes/tidbitmap.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+/* We don't want this file to depend on execnodes.h. */
+struct IndexInfo;
+
+/*
+ * Struct for statistics returned by ambuild
+ */
+typedef struct IndexBuildResult
+{
+	double		heap_tuples;	/* # of tuples seen in parent table */
+	double		index_tuples;	/* # of tuples inserted into index */
+} IndexBuildResult;
+
+/*
+ * Struct for input arguments passed to ambulkdelete and amvacuumcleanup
+ *
+ * num_heap_tuples is accurate only when estimated_count is false;
+ * otherwise it's just an estimate (currently, the estimate is the
+ * prior value of the relation's pg_class.reltuples field, so it could
+ * even be -1).  It will always just be an estimate during ambulkdelete.
+ */
+typedef struct IndexVacuumInfo
+{
+	Relation	index;			/* the index being vacuumed */
+	bool		analyze_only;	/* ANALYZE (without any actual vacuum) */
+	bool		report_progress;	/* emit progress.h status reports */
+	bool		estimated_count;	/* num_heap_tuples is an estimate */
+	int			message_level;	/* ereport level for progress messages */
+	double		num_heap_tuples;	/* tuples remaining in heap */
+	BufferAccessStrategy strategy;	/* access strategy for reads */
+} IndexVacuumInfo;
+
+/*
+ * Struct for statistics returned by ambulkdelete and amvacuumcleanup
+ *
+ * This struct is normally allocated by the first ambulkdelete call and then
+ * passed along through subsequent ones until amvacuumcleanup; however,
+ * amvacuumcleanup must be prepared to allocate it in the case where no
+ * ambulkdelete calls were made (because no tuples needed deletion).
+ * Note that an index AM could choose to return a larger struct
+ * of which this is just the first field; this provides a way for ambulkdelete
+ * to communicate additional private data to amvacuumcleanup.
+ *
+ * Note: pages_newly_deleted is the number of pages in the index that were
+ * deleted by the current vacuum operation.  pages_deleted and pages_free
+ * refer to free space within the index file.
+ *
+ * Note: Some index AMs may compute num_index_tuples by reference to
+ * num_heap_tuples, in which case they should copy the estimated_count field
+ * from IndexVacuumInfo.
+ */
+typedef struct IndexBulkDeleteResult
+{
+	BlockNumber num_pages;		/* pages remaining in index */
+	bool		estimated_count;	/* num_index_tuples is an estimate */
+	double		num_index_tuples;	/* tuples remaining */
+	double		tuples_removed; /* # removed during vacuum operation */
+	BlockNumber pages_newly_deleted;	/* # pages marked deleted by us  */
+	BlockNumber pages_deleted;	/* # pages marked deleted (could be by us) */
+	BlockNumber pages_free;		/* # pages available for reuse */
+} IndexBulkDeleteResult;
+
+/* Typedef for callback function to determine if a tuple is bulk-deletable */
+typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state);
+
+/* struct definitions appear in relscan.h */
+typedef struct IndexScanDescData *IndexScanDesc;
+typedef struct SysScanDescData *SysScanDesc;
+
+typedef struct ParallelIndexScanDescData *ParallelIndexScanDesc;
+
+/*
+ * Enumeration specifying the type of uniqueness check to perform in
+ * index_insert().
+ *
+ * UNIQUE_CHECK_YES is the traditional Postgres immediate check, possibly
+ * blocking to see if a conflicting transaction commits.
+ *
+ * For deferrable unique constraints, UNIQUE_CHECK_PARTIAL is specified at
+ * insertion time.  The index AM should test if the tuple is unique, but
+ * should not throw error, block, or prevent the insertion if the tuple
+ * appears not to be unique.  We'll recheck later when it is time for the
+ * constraint to be enforced.  The AM must return true if the tuple is
+ * known unique, false if it is possibly non-unique.  In the "true" case
+ * it is safe to omit the later recheck.
+ *
+ * When it is time to recheck the deferred constraint, a pseudo-insertion
+ * call is made with UNIQUE_CHECK_EXISTING.  The tuple is already in the
+ * index in this case, so it should not be inserted again.  Rather, just
+ * check for conflicting live tuples (possibly blocking).
+ */
+typedef enum IndexUniqueCheck
+{
+	UNIQUE_CHECK_NO,			/* Don't do any uniqueness checking */
+	UNIQUE_CHECK_YES,			/* Enforce uniqueness at insertion time */
+	UNIQUE_CHECK_PARTIAL,		/* Test uniqueness, but no error */
+	UNIQUE_CHECK_EXISTING		/* Check if existing tuple is unique */
+} IndexUniqueCheck;
+
+
+/* Nullable "ORDER BY col op const" distance */
+typedef struct IndexOrderByDistance
+{
+	double		value;
+	bool		isnull;
+} IndexOrderByDistance;
+
+/*
+ * generalized index_ interface routines (in indexam.c)
+ */
+
+/*
+ * IndexScanIsValid
+ *		True iff the index scan is valid.
+ */
+#define IndexScanIsValid(scan) PointerIsValid(scan)
+
+extern Relation index_open(Oid relationId, LOCKMODE lockmode);
+extern void index_close(Relation relation, LOCKMODE lockmode);
+
+extern bool index_insert(Relation indexRelation,
+						 Datum *values, bool *isnull,
+						 ItemPointer heap_t_ctid,
+						 Relation heapRelation,
+						 IndexUniqueCheck checkUnique,
+						 bool indexUnchanged,
+						 struct IndexInfo *indexInfo);
+
+extern IndexScanDesc index_beginscan(Relation heapRelation,
+									 Relation indexRelation,
+									 Snapshot snapshot,
+									 int nkeys, int norderbys);
+extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
+											Snapshot snapshot,
+											int nkeys);
+extern void index_rescan(IndexScanDesc scan,
+						 ScanKey keys, int nkeys,
+						 ScanKey orderbys, int norderbys);
+extern void index_endscan(IndexScanDesc scan);
+extern void index_markpos(IndexScanDesc scan);
+extern void index_restrpos(IndexScanDesc scan);
+extern Size index_parallelscan_estimate(Relation indexrel, Snapshot snapshot);
+extern void index_parallelscan_initialize(Relation heaprel, Relation indexrel,
+										  Snapshot snapshot, ParallelIndexScanDesc target);
+extern void index_parallelrescan(IndexScanDesc scan);
+extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
+											  Relation indexrel, int nkeys, int norderbys,
+											  ParallelIndexScanDesc pscan);
+extern ItemPointer index_getnext_tid(IndexScanDesc scan,
+									 ScanDirection direction);
+struct TupleTableSlot;
+extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
+extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction,
+							   struct TupleTableSlot *slot);
+extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap);
+
+extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
+												IndexBulkDeleteResult *istat,
+												IndexBulkDeleteCallback callback,
+												void *callback_state);
+extern IndexBulkDeleteResult *index_vacuum_cleanup(IndexVacuumInfo *info,
+												   IndexBulkDeleteResult *istat);
+extern bool index_can_return(Relation indexRelation, int attno);
+extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
+									uint16 procnum);
+extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
+								   uint16 procnum);
+extern void index_store_float8_orderby_distances(IndexScanDesc scan,
+												 Oid *orderByTypes,
+												 IndexOrderByDistance *distances,
+												 bool recheckOrderBy);
+extern bytea *index_opclass_options(Relation relation, AttrNumber attnum,
+									Datum attoptions, bool validate);
+
+
+/*
+ * index access method support routines (in genam.c)
+ */
+extern IndexScanDesc RelationGetIndexScan(Relation indexRelation,
+										  int nkeys, int norderbys);
+extern void IndexScanEnd(IndexScanDesc scan);
+extern char *BuildIndexValueDescription(Relation indexRelation,
+										Datum *values, bool *isnull);
+extern TransactionId index_compute_xid_horizon_for_tuples(Relation irel,
+														  Relation hrel,
+														  Buffer ibuf,
+														  OffsetNumber *itemnos,
+														  int nitems);
+
+/*
+ * heap-or-index access to system catalogs (in genam.c)
+ */
+extern SysScanDesc systable_beginscan(Relation heapRelation,
+									  Oid indexId,
+									  bool indexOK,
+									  Snapshot snapshot,
+									  int nkeys, ScanKey key);
+extern HeapTuple systable_getnext(SysScanDesc sysscan);
+extern bool systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup);
+extern void systable_endscan(SysScanDesc sysscan);
+extern SysScanDesc systable_beginscan_ordered(Relation heapRelation,
+											  Relation indexRelation,
+											  Snapshot snapshot,
+											  int nkeys, ScanKey key);
+extern HeapTuple systable_getnext_ordered(SysScanDesc sysscan,
+										  ScanDirection direction);
+extern void systable_endscan_ordered(SysScanDesc sysscan);
+
+#endif							/* GENAM_H */
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
new file mode 100644
index 0000000..6e0a275
--- /dev/null
+++ b/src/include/access/generic_xlog.h
@@ -0,0 +1,45 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic_xlog.h
+ *	  Generic xlog API definition.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/generic_xlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GENERIC_XLOG_H
+#define GENERIC_XLOG_H
+
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "access/xloginsert.h"
+#include "storage/bufpage.h"
+#include "utils/rel.h"
+
+#define MAX_GENERIC_XLOG_PAGES	XLR_NORMAL_MAX_BLOCK_ID
+
+/* Flag bits for GenericXLogRegisterBuffer */
+#define GENERIC_XLOG_FULL_IMAGE 0x0001	/* write full-page image */
+
+/* state of generic xlog record construction */
+struct GenericXLogState;
+typedef struct GenericXLogState GenericXLogState;
+
+/* API for construction of generic xlog records */
+extern GenericXLogState *GenericXLogStart(Relation relation);
+extern Page GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer,
+									  int flags);
+extern XLogRecPtr GenericXLogFinish(GenericXLogState *state);
+extern void GenericXLogAbort(GenericXLogState *state);
+
+/* functions defined for rmgr */
+extern void generic_redo(XLogReaderState *record);
+extern const char *generic_identify(uint8 info);
+extern void generic_desc(StringInfo buf, XLogReaderState *record);
+extern void generic_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* GENERIC_XLOG_H */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
new file mode 100644
index 0000000..266cb07
--- /dev/null
+++ b/src/include/access/gin.h
@@ -0,0 +1,78 @@
+/*--------------------------------------------------------------------------
+ * gin.h
+ *	  Public header file for Generalized Inverted Index access method.
+ *
+ *	Copyright (c) 2006-2021, PostgreSQL Global Development Group
+ *
+ *	src/include/access/gin.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_H
+#define GIN_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/block.h"
+#include "utils/relcache.h"
+
+
+/*
+ * amproc indexes for inverted indexes.
+ */
+#define GIN_COMPARE_PROC			   1
+#define GIN_EXTRACTVALUE_PROC		   2
+#define GIN_EXTRACTQUERY_PROC		   3
+#define GIN_CONSISTENT_PROC			   4
+#define GIN_COMPARE_PARTIAL_PROC	   5
+#define GIN_TRICONSISTENT_PROC		   6
+#define GIN_OPTIONS_PROC	   7
+#define GINNProcs					   7
+
+/*
+ * searchMode settings for extractQueryFn.
+ */
+#define GIN_SEARCH_MODE_DEFAULT			0
+#define GIN_SEARCH_MODE_INCLUDE_EMPTY	1
+#define GIN_SEARCH_MODE_ALL				2
+#define GIN_SEARCH_MODE_EVERYTHING		3	/* for internal use only */
+
+/*
+ * GinStatsData represents stats data for planner use
+ */
+typedef struct GinStatsData
+{
+	BlockNumber nPendingPages;
+	BlockNumber nTotalPages;
+	BlockNumber nEntryPages;
+	BlockNumber nDataPages;
+	int64		nEntries;
+	int32		ginVersion;
+} GinStatsData;
+
+/*
+ * A ternary value used by tri-consistent functions.
+ *
+ * This must be of the same size as a bool because some code will cast a
+ * pointer to a bool to a pointer to a GinTernaryValue.
+ */
+typedef char GinTernaryValue;
+
+#define GIN_FALSE		0		/* item is not present / does not match */
+#define GIN_TRUE		1		/* item is present / matches */
+#define GIN_MAYBE		2		/* don't know if item is present / don't know
+								 * if matches */
+
+#define DatumGetGinTernaryValue(X) ((GinTernaryValue)(X))
+#define GinTernaryValueGetDatum(X) ((Datum)(X))
+#define PG_RETURN_GIN_TERNARY_VALUE(x) return GinTernaryValueGetDatum(x)
+
+/* GUC parameters */
+extern PGDLLIMPORT int GinFuzzySearchLimit;
+extern int	gin_pending_list_limit;
+
+/* ginutil.c */
+extern void ginGetStats(Relation index, GinStatsData *stats);
+extern void ginUpdateStats(Relation index, const GinStatsData *stats,
+						   bool is_build);
+
+#endif							/* GIN_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
new file mode 100644
index 0000000..670a40b
--- /dev/null
+++ b/src/include/access/gin_private.h
@@ -0,0 +1,500 @@
+/*--------------------------------------------------------------------------
+ * gin_private.h
+ *	  header file for postgres inverted index access method implementation.
+ *
+ *	Copyright (c) 2006-2021, PostgreSQL Global Development Group
+ *
+ *	src/include/access/gin_private.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_PRIVATE_H
+#define GIN_PRIVATE_H
+
+#include "access/amapi.h"
+#include "access/gin.h"
+#include "access/ginblock.h"
+#include "access/itup.h"
+#include "catalog/pg_am_d.h"
+#include "fmgr.h"
+#include "lib/rbtree.h"
+#include "storage/bufmgr.h"
+
+/*
+ * Storage type for GIN's reloptions
+ */
+typedef struct GinOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	bool		useFastUpdate;	/* use fast updates? */
+	int			pendingListCleanupSize; /* maximum size of pending list */
+} GinOptions;
+
+#define GIN_DEFAULT_USE_FASTUPDATE	true
+#define GinGetUseFastUpdate(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == GIN_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
+#define GinGetPendingListCleanupSize(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == GIN_AM_OID), \
+	 (relation)->rd_options && \
+	 ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize != -1 ? \
+	 ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize : \
+	 gin_pending_list_limit)
+
+
+/* Macros for buffer lock/unlock operations */
+#define GIN_UNLOCK	BUFFER_LOCK_UNLOCK
+#define GIN_SHARE	BUFFER_LOCK_SHARE
+#define GIN_EXCLUSIVE  BUFFER_LOCK_EXCLUSIVE
+
+
+/*
+ * GinState: working data structure describing the index being worked on
+ */
+typedef struct GinState
+{
+	Relation	index;
+	bool		oneCol;			/* true if single-column index */
+
+	/*
+	 * origTupdesc is the nominal tuple descriptor of the index, ie, the i'th
+	 * attribute shows the key type (not the input data type!) of the i'th
+	 * index column.  In a single-column index this describes the actual leaf
+	 * index tuples.  In a multi-column index, the actual leaf tuples contain
+	 * a smallint column number followed by a key datum of the appropriate
+	 * type for that column.  We set up tupdesc[i] to describe the actual
+	 * rowtype of the index tuples for the i'th column, ie, (int2, keytype).
+	 * Note that in any case, leaf tuples contain more data than is known to
+	 * the TupleDesc; see access/gin/README for details.
+	 */
+	TupleDesc	origTupdesc;
+	TupleDesc	tupdesc[INDEX_MAX_KEYS];
+
+	/*
+	 * Per-index-column opclass support functions
+	 */
+	FmgrInfo	compareFn[INDEX_MAX_KEYS];
+	FmgrInfo	extractValueFn[INDEX_MAX_KEYS];
+	FmgrInfo	extractQueryFn[INDEX_MAX_KEYS];
+	FmgrInfo	consistentFn[INDEX_MAX_KEYS];
+	FmgrInfo	triConsistentFn[INDEX_MAX_KEYS];
+	FmgrInfo	comparePartialFn[INDEX_MAX_KEYS];	/* optional method */
+	/* canPartialMatch[i] is true if comparePartialFn[i] is valid */
+	bool		canPartialMatch[INDEX_MAX_KEYS];
+	/* Collations to pass to the support functions */
+	Oid			supportCollation[INDEX_MAX_KEYS];
+} GinState;
+
+
+/* ginutil.c */
+extern bytea *ginoptions(Datum reloptions, bool validate);
+extern void initGinState(GinState *state, Relation index);
+extern Buffer GinNewBuffer(Relation index);
+extern void GinInitBuffer(Buffer b, uint32 f);
+extern void GinInitPage(Page page, uint32 f, Size pageSize);
+extern void GinInitMetabuffer(Buffer b);
+extern int	ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
+							  Datum a, GinNullCategory categorya,
+							  Datum b, GinNullCategory categoryb);
+extern int	ginCompareAttEntries(GinState *ginstate,
+								 OffsetNumber attnuma, Datum a, GinNullCategory categorya,
+								 OffsetNumber attnumb, Datum b, GinNullCategory categoryb);
+extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
+								Datum value, bool isNull,
+								int32 *nentries, GinNullCategory **categories);
+
+extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
+extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
+							  GinNullCategory *category);
+
+/* gininsert.c */
+extern IndexBuildResult *ginbuild(Relation heap, Relation index,
+								  struct IndexInfo *indexInfo);
+extern void ginbuildempty(Relation index);
+extern bool gininsert(Relation index, Datum *values, bool *isnull,
+					  ItemPointer ht_ctid, Relation heapRel,
+					  IndexUniqueCheck checkUnique,
+					  bool indexUnchanged,
+					  struct IndexInfo *indexInfo);
+extern void ginEntryInsert(GinState *ginstate,
+						   OffsetNumber attnum, Datum key, GinNullCategory category,
+						   ItemPointerData *items, uint32 nitem,
+						   GinStatsData *buildStats);
+
+/* ginbtree.c */
+
+typedef struct GinBtreeStack
+{
+	BlockNumber blkno;
+	Buffer		buffer;
+	OffsetNumber off;
+	ItemPointerData iptr;
+	/* predictNumber contains predicted number of pages on current level */
+	uint32		predictNumber;
+	struct GinBtreeStack *parent;
+} GinBtreeStack;
+
+typedef struct GinBtreeData *GinBtree;
+
+/* Return codes for GinBtreeData.beginPlaceToPage method */
+typedef enum
+{
+	GPTP_NO_WORK,
+	GPTP_INSERT,
+	GPTP_SPLIT
+} GinPlaceToPageRC;
+
+typedef struct GinBtreeData
+{
+	/* search methods */
+	BlockNumber (*findChildPage) (GinBtree, GinBtreeStack *);
+	BlockNumber (*getLeftMostChild) (GinBtree, Page);
+	bool		(*isMoveRight) (GinBtree, Page);
+	bool		(*findItem) (GinBtree, GinBtreeStack *);
+
+	/* insert methods */
+	OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
+	GinPlaceToPageRC (*beginPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *);
+	void		(*execPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *);
+	void	   *(*prepareDownlink) (GinBtree, Buffer);
+	void		(*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
+
+	bool		isData;
+
+	Relation	index;
+	BlockNumber rootBlkno;
+	GinState   *ginstate;		/* not valid in a data scan */
+	bool		fullScan;
+	bool		isBuild;
+
+	/* Search key for Entry tree */
+	OffsetNumber entryAttnum;
+	Datum		entryKey;
+	GinNullCategory entryCategory;
+
+	/* Search key for data tree (posting tree) */
+	ItemPointerData itemptr;
+} GinBtreeData;
+
+/* This represents a tuple to be inserted to entry tree. */
+typedef struct
+{
+	IndexTuple	entry;			/* tuple to insert */
+	bool		isDelete;		/* delete old tuple at same offset? */
+} GinBtreeEntryInsertData;
+
+/*
+ * This represents an itempointer, or many itempointers, to be inserted to
+ * a data (posting tree) leaf page
+ */
+typedef struct
+{
+	ItemPointerData *items;
+	uint32		nitem;
+	uint32		curitem;
+} GinBtreeDataLeafInsertData;
+
+/*
+ * For internal data (posting tree) pages, the insertion payload is a
+ * PostingItem
+ */
+
+extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode,
+									  bool rootConflictCheck, Snapshot snapshot);
+extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
+extern void freeGinBtreeStack(GinBtreeStack *stack);
+extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
+						   void *insertdata, GinStatsData *buildStats);
+
+/* ginentrypage.c */
+extern IndexTuple GinFormTuple(GinState *ginstate,
+							   OffsetNumber attnum, Datum key, GinNullCategory category,
+							   Pointer data, Size dataSize, int nipd, bool errorTooBig);
+extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
+								Datum key, GinNullCategory category,
+								GinState *ginstate);
+extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
+extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum,
+								IndexTuple itup, int *nitems);
+
+/* gindatapage.c */
+extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast);
+extern int	GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm);
+extern BlockNumber createPostingTree(Relation index,
+									 ItemPointerData *items, uint32 nitems,
+									 GinStatsData *buildStats, Buffer entrybuffer);
+extern void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset);
+extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
+extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
+								  ItemPointerData *items, uint32 nitem,
+								  GinStatsData *buildStats);
+extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
+extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
+
+/*
+ * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
+ * and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward
+ * declaration for it.
+ */
+typedef struct GinVacuumState GinVacuumState;
+
+extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs);
+
+/* ginscan.c */
+
+/*
+ * GinScanKeyData describes a single GIN index qualifier expression.
+ *
+ * From each qual expression, we extract one or more specific index search
+ * conditions, which are represented by GinScanEntryData.  It's quite
+ * possible for identical search conditions to be requested by more than
+ * one qual expression, in which case we merge such conditions to have just
+ * one unique GinScanEntry --- this is particularly important for efficiency
+ * when dealing with full-index-scan entries.  So there can be multiple
+ * GinScanKeyData.scanEntry pointers to the same GinScanEntryData.
+ *
+ * In each GinScanKeyData, nentries is the true number of entries, while
+ * nuserentries is the number that extractQueryFn returned (which is what
+ * we report to consistentFn).  The "user" entries must come first.
+ */
+typedef struct GinScanKeyData *GinScanKey;
+
+typedef struct GinScanEntryData *GinScanEntry;
+
+typedef struct GinScanKeyData
+{
+	/* Real number of entries in scanEntry[] (always > 0) */
+	uint32		nentries;
+	/* Number of entries that extractQueryFn and consistentFn know about */
+	uint32		nuserentries;
+
+	/* array of GinScanEntry pointers, one per extracted search condition */
+	GinScanEntry *scanEntry;
+
+	/*
+	 * At least one of the entries in requiredEntries must be present for a
+	 * tuple to match the overall qual.
+	 *
+	 * additionalEntries contains entries that are needed by the consistent
+	 * function to decide if an item matches, but are not sufficient to
+	 * satisfy the qual without entries from requiredEntries.
+	 */
+	GinScanEntry *requiredEntries;
+	int			nrequired;
+	GinScanEntry *additionalEntries;
+	int			nadditional;
+
+	/* array of check flags, reported to consistentFn */
+	GinTernaryValue *entryRes;
+	bool		(*boolConsistentFn) (GinScanKey key);
+	GinTernaryValue (*triConsistentFn) (GinScanKey key);
+	FmgrInfo   *consistentFmgrInfo;
+	FmgrInfo   *triConsistentFmgrInfo;
+	Oid			collation;
+
+	/* other data needed for calling consistentFn */
+	Datum		query;
+	/* NB: these three arrays have only nuserentries elements! */
+	Datum	   *queryValues;
+	GinNullCategory *queryCategories;
+	Pointer    *extra_data;
+	StrategyNumber strategy;
+	int32		searchMode;
+	OffsetNumber attnum;
+
+	/*
+	 * An excludeOnly scan key is not able to enumerate all matching tuples.
+	 * That is, to be semantically correct on its own, it would need to have a
+	 * GIN_CAT_EMPTY_QUERY scanEntry, but it doesn't.  Such a key can still be
+	 * used to filter tuples returned by other scan keys, so we will get the
+	 * right answers as long as there's at least one non-excludeOnly scan key
+	 * for each index attribute considered by the search.  For efficiency
+	 * reasons we don't want to have unnecessary GIN_CAT_EMPTY_QUERY entries,
+	 * so we will convert an excludeOnly scan key to non-excludeOnly (by
+	 * adding a GIN_CAT_EMPTY_QUERY scanEntry) only if there are no other
+	 * non-excludeOnly scan keys.
+	 */
+	bool		excludeOnly;
+
+	/*
+	 * Match status data.  curItem is the TID most recently tested (could be a
+	 * lossy-page pointer).  curItemMatches is true if it passes the
+	 * consistentFn test; if so, recheckCurItem is the recheck flag.
+	 * isFinished means that all the input entry streams are finished, so this
+	 * key cannot succeed for any later TIDs.
+	 */
+	ItemPointerData curItem;
+	bool		curItemMatches;
+	bool		recheckCurItem;
+	bool		isFinished;
+}			GinScanKeyData;
+
+typedef struct GinScanEntryData
+{
+	/* query key and other information from extractQueryFn */
+	Datum		queryKey;
+	GinNullCategory queryCategory;
+	bool		isPartialMatch;
+	Pointer		extra_data;
+	StrategyNumber strategy;
+	int32		searchMode;
+	OffsetNumber attnum;
+
+	/* Current page in posting tree */
+	Buffer		buffer;
+
+	/* current ItemPointer to heap */
+	ItemPointerData curItem;
+
+	/* for a partial-match or full-scan query, we accumulate all TIDs here */
+	TIDBitmap  *matchBitmap;
+	TBMIterator *matchIterator;
+	TBMIterateResult *matchResult;
+
+	/* used for Posting list and one page in Posting tree */
+	ItemPointerData *list;
+	int			nlist;
+	OffsetNumber offset;
+
+	bool		isFinished;
+	bool		reduceResult;
+	uint32		predictNumberResult;
+	GinBtreeData btree;
+}			GinScanEntryData;
+
+typedef struct GinScanOpaqueData
+{
+	MemoryContext tempCtx;
+	GinState	ginstate;
+
+	GinScanKey	keys;			/* one per scan qualifier expr */
+	uint32		nkeys;
+
+	GinScanEntry *entries;		/* one per index search condition */
+	uint32		totalentries;
+	uint32		allocentries;	/* allocated length of entries[] */
+
+	MemoryContext keyCtx;		/* used to hold key and entry data */
+
+	bool		isVoidRes;		/* true if query is unsatisfiable */
+} GinScanOpaqueData;
+
+typedef GinScanOpaqueData *GinScanOpaque;
+
+extern IndexScanDesc ginbeginscan(Relation rel, int nkeys, int norderbys);
+extern void ginendscan(IndexScanDesc scan);
+extern void ginrescan(IndexScanDesc scan, ScanKey key, int nscankeys,
+					  ScanKey orderbys, int norderbys);
+extern void ginNewScanKey(IndexScanDesc scan);
+extern void ginFreeScanKeys(GinScanOpaque so);
+
+/* ginget.c */
+extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+
+/* ginlogic.c */
+extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
+
+/* ginvacuum.c */
+extern IndexBulkDeleteResult *ginbulkdelete(IndexVacuumInfo *info,
+											IndexBulkDeleteResult *stats,
+											IndexBulkDeleteCallback callback,
+											void *callback_state);
+extern IndexBulkDeleteResult *ginvacuumcleanup(IndexVacuumInfo *info,
+											   IndexBulkDeleteResult *stats);
+extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs,
+										 ItemPointerData *items, int nitem, int *nremaining);
+
+/* ginvalidate.c */
+extern bool ginvalidate(Oid opclassoid);
+extern void ginadjustmembers(Oid opfamilyoid,
+							 Oid opclassoid,
+							 List *operators,
+							 List *functions);
+
+/* ginbulk.c */
+typedef struct GinEntryAccumulator
+{
+	RBTNode		rbtnode;
+	Datum		key;
+	GinNullCategory category;
+	OffsetNumber attnum;
+	bool		shouldSort;
+	ItemPointerData *list;
+	uint32		maxcount;		/* allocated size of list[] */
+	uint32		count;			/* current number of list[] entries */
+} GinEntryAccumulator;
+
+typedef struct
+{
+	GinState   *ginstate;
+	Size		allocatedMemory;
+	GinEntryAccumulator *entryallocator;
+	uint32		eas_used;
+	RBTree	   *tree;
+	RBTreeIterator tree_walk;
+} BuildAccumulator;
+
+extern void ginInitBA(BuildAccumulator *accum);
+extern void ginInsertBAEntries(BuildAccumulator *accum,
+							   ItemPointer heapptr, OffsetNumber attnum,
+							   Datum *entries, GinNullCategory *categories,
+							   int32 nentries);
+extern void ginBeginBAScan(BuildAccumulator *accum);
+extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum,
+									  OffsetNumber *attnum, Datum *key, GinNullCategory *category,
+									  uint32 *n);
+
+/* ginfast.c */
+
+typedef struct GinTupleCollector
+{
+	IndexTuple *tuples;
+	uint32		ntuples;
+	uint32		lentuples;
+	uint32		sumsize;
+} GinTupleCollector;
+
+extern void ginHeapTupleFastInsert(GinState *ginstate,
+								   GinTupleCollector *collector);
+extern void ginHeapTupleFastCollect(GinState *ginstate,
+									GinTupleCollector *collector,
+									OffsetNumber attnum, Datum value, bool isNull,
+									ItemPointer ht_ctid);
+extern void ginInsertCleanup(GinState *ginstate, bool full_clean,
+							 bool fill_fsm, bool forceCleanup, IndexBulkDeleteResult *stats);
+
+/* ginpostinglist.c */
+
+extern GinPostingList *ginCompressPostingList(const ItemPointer ipd, int nipd,
+											  int maxsize, int *nwritten);
+extern int	ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm);
+
+extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded);
+extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded);
+extern ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na,
+										ItemPointerData *b, uint32 nb,
+										int *nmerged);
+
+/*
+ * Merging the results of several gin scans compares item pointers a lot,
+ * so we want this to be inlined.
+ */
+static inline int
+ginCompareItemPointers(ItemPointer a, ItemPointer b)
+{
+	uint64		ia = (uint64) GinItemPointerGetBlockNumber(a) << 32 | GinItemPointerGetOffsetNumber(a);
+	uint64		ib = (uint64) GinItemPointerGetBlockNumber(b) << 32 | GinItemPointerGetOffsetNumber(b);
+
+	if (ia == ib)
+		return 0;
+	else if (ia > ib)
+		return 1;
+	else
+		return -1;
+}
+
+extern int	ginTraverseLock(Buffer buffer, bool searchMode);
+
+#endif							/* GIN_PRIVATE_H */
diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h
new file mode 100644
index 0000000..37d650a
--- /dev/null
+++ b/src/include/access/ginblock.h
@@ -0,0 +1,346 @@
+/*--------------------------------------------------------------------------
+ * ginblock.h
+ *	  details of structures stored in GIN index blocks
+ *
+ *	Copyright (c) 2006-2021, PostgreSQL Global Development Group
+ *
+ *	src/include/access/ginblock.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GINBLOCK_H
+#define GINBLOCK_H
+
+#include "access/transam.h"
+#include "storage/block.h"
+#include "storage/bufpage.h"
+#include "storage/itemptr.h"
+#include "storage/off.h"
+
+/*
+ * Page opaque data in an inverted index page.
+ *
+ * Note: GIN does not include a page ID word as do the other index types.
+ * This is OK because the opaque data is only 8 bytes and so can be reliably
+ * distinguished by size.  Revisit this if the size ever increases.
+ * Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
+ * BRIN as of 9.5.  This is still OK, as long as GIN isn't using all of the
+ * high-order bits in its flags word, because that way the flags word cannot
+ * match the page IDs used by SP-GiST and BRIN.
+ */
+typedef struct GinPageOpaqueData
+{
+	BlockNumber rightlink;		/* next page if any */
+	OffsetNumber maxoff;		/* number of PostingItems on GIN_DATA &
+								 * ~GIN_LEAF page. On GIN_LIST page, number of
+								 * heap tuples. */
+	uint16		flags;			/* see bit definitions below */
+} GinPageOpaqueData;
+
+typedef GinPageOpaqueData *GinPageOpaque;
+
+#define GIN_DATA		  (1 << 0)
+#define GIN_LEAF		  (1 << 1)
+#define GIN_DELETED		  (1 << 2)
+#define GIN_META		  (1 << 3)
+#define GIN_LIST		  (1 << 4)
+#define GIN_LIST_FULLROW  (1 << 5)	/* makes sense only on GIN_LIST page */
+#define GIN_INCOMPLETE_SPLIT (1 << 6)	/* page was split, but parent not
+										 * updated */
+#define GIN_COMPRESSED	  (1 << 7)
+
+/* Page numbers of fixed-location pages */
+#define GIN_METAPAGE_BLKNO	(0)
+#define GIN_ROOT_BLKNO		(1)
+
+typedef struct GinMetaPageData
+{
+	/*
+	 * Pointers to head and tail of pending list, which consists of GIN_LIST
+	 * pages.  These store fast-inserted entries that haven't yet been moved
+	 * into the regular GIN structure.
+	 */
+	BlockNumber head;
+	BlockNumber tail;
+
+	/*
+	 * Free space in bytes in the pending list's tail page.
+	 */
+	uint32		tailFreeSize;
+
+	/*
+	 * We store both number of pages and number of heap tuples that are in the
+	 * pending list.
+	 */
+	BlockNumber nPendingPages;
+	int64		nPendingHeapTuples;
+
+	/*
+	 * Statistics for planner use (accurate as of last VACUUM)
+	 */
+	BlockNumber nTotalPages;
+	BlockNumber nEntryPages;
+	BlockNumber nDataPages;
+	int64		nEntries;
+
+	/*
+	 * GIN version number (ideally this should have been at the front, but too
+	 * late now.  Don't move it!)
+	 *
+	 * Currently 2 (for indexes initialized in 9.4 or later)
+	 *
+	 * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
+	 * compatible, but may contain uncompressed posting tree (leaf) pages and
+	 * posting lists. They will be converted to compressed format when
+	 * modified.
+	 *
+	 * Version 0 (indexes initialized in 9.0 or before) is compatible but may
+	 * be missing null entries, including both null keys and placeholders.
+	 * Reject full-index-scan attempts on such indexes.
+	 */
+	int32		ginVersion;
+} GinMetaPageData;
+
+#define GIN_CURRENT_VERSION		2
+
+#define GinPageGetMeta(p) \
+	((GinMetaPageData *) PageGetContents(p))
+
+/*
+ * Macros for accessing a GIN index page's opaque data
+ */
+#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
+
+#define GinPageIsLeaf(page)    ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
+#define GinPageSetLeaf(page)   ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
+#define GinPageSetNonLeaf(page)    ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
+#define GinPageIsData(page)    ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
+#define GinPageSetData(page)   ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageIsList(page)    ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
+#define GinPageSetList(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST )
+#define GinPageHasFullRow(page)    ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
+#define GinPageSetFullRow(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
+#define GinPageIsCompressed(page)	 ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
+#define GinPageSetCompressed(page)	 ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
+
+#define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
+#define GinPageSetDeleted(page)    ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
+#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
+#define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
+
+#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
+
+/*
+ * We should reclaim deleted page only once every transaction started before
+ * its deletion is over.
+ */
+#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
+#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
+extern bool GinPageIsRecyclable(Page page);
+
+/*
+ * We use our own ItemPointerGet(BlockNumber|OffsetNumber)
+ * to avoid Asserts, since sometimes the ip_posid isn't "valid"
+ */
+#define GinItemPointerGetBlockNumber(pointer) \
+	(ItemPointerGetBlockNumberNoCheck(pointer))
+
+#define GinItemPointerGetOffsetNumber(pointer) \
+	(ItemPointerGetOffsetNumberNoCheck(pointer))
+
+#define GinItemPointerSetBlockNumber(pointer, blkno) \
+	(ItemPointerSetBlockNumber((pointer), (blkno)))
+
+#define GinItemPointerSetOffsetNumber(pointer, offnum) \
+	(ItemPointerSetOffsetNumber((pointer), (offnum)))
+
+
+/*
+ * Special-case item pointer values needed by the GIN search logic.
+ *	MIN: sorts less than any valid item pointer
+ *	MAX: sorts greater than any valid item pointer
+ *	LOSSY PAGE: indicates a whole heap page, sorts after normal item
+ *				pointers for that page
+ * Note that these are all distinguishable from an "invalid" item pointer
+ * (which is InvalidBlockNumber/0) as well as from all normal item
+ * pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
+ */
+#define ItemPointerSetMin(p)  \
+	ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
+#define ItemPointerIsMin(p)  \
+	(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
+	 GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
+#define ItemPointerSetMax(p)  \
+	ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
+#define ItemPointerSetLossyPage(p, b)  \
+	ItemPointerSet((p), (b), (OffsetNumber)0xffff)
+#define ItemPointerIsLossyPage(p)  \
+	(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+	 GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
+
+/*
+ * Posting item in a non-leaf posting-tree page
+ */
+typedef struct
+{
+	/* We use BlockIdData not BlockNumber to avoid padding space wastage */
+	BlockIdData child_blkno;
+	ItemPointerData key;
+} PostingItem;
+
+#define PostingItemGetBlockNumber(pointer) \
+	BlockIdGetBlockNumber(&(pointer)->child_blkno)
+
+#define PostingItemSetBlockNumber(pointer, blockNumber) \
+	BlockIdSet(&((pointer)->child_blkno), (blockNumber))
+
+/*
+ * Category codes to distinguish placeholder nulls from ordinary NULL keys.
+ *
+ * The first two code values were chosen to be compatible with the usual usage
+ * of bool isNull flags.  However, casting between bool and GinNullCategory is
+ * risky because of the possibility of different bit patterns and type sizes,
+ * so it is no longer done.
+ *
+ * GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
+ * chosen to sort before not after regular key values.
+ */
+typedef signed char GinNullCategory;
+
+#define GIN_CAT_NORM_KEY		0	/* normal, non-null key value */
+#define GIN_CAT_NULL_KEY		1	/* null key value */
+#define GIN_CAT_EMPTY_ITEM		2	/* placeholder for zero-key item */
+#define GIN_CAT_NULL_ITEM		3	/* placeholder for null item */
+#define GIN_CAT_EMPTY_QUERY		(-1)	/* placeholder for full-scan query */
+
+/*
+ * Access macros for null category byte in entry tuples
+ */
+#define GinCategoryOffset(itup,ginstate) \
+	(IndexInfoFindDataOffset((itup)->t_info) + \
+	 ((ginstate)->oneCol ? 0 : sizeof(int16)))
+#define GinGetNullCategory(itup,ginstate) \
+	(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
+#define GinSetNullCategory(itup,ginstate,c) \
+	(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
+
+/*
+ * Access macros for leaf-page entry tuples (see discussion in README)
+ */
+#define GinGetNPosting(itup)	GinItemPointerGetOffsetNumber(&(itup)->t_tid)
+#define GinSetNPosting(itup,n)	ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
+#define GIN_TREE_POSTING		((OffsetNumber)0xffff)
+#define GinIsPostingTree(itup)	(GinGetNPosting(itup) == GIN_TREE_POSTING)
+#define GinSetPostingTree(itup, blkno)	( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
+#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
+
+#define GIN_ITUP_COMPRESSED		(1U << 31)
+#define GinGetPostingOffset(itup)	(GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
+#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
+#define GinGetPosting(itup)			((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
+#define GinItupIsCompressed(itup)	((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
+
+/*
+ * Maximum size of an item on entry tree page. Make sure that we fit at least
+ * three items on each page. (On regular B-tree indexes, we must fit at least
+ * three items: two data items and the "high key". In GIN entry tree, we don't
+ * currently store the high key explicitly, we just use the rightmost item on
+ * the page, so it would actually be enough to fit two items.)
+ */
+#define GinMaxItemSize \
+	Min(INDEX_SIZE_MASK, \
+		MAXALIGN_DOWN(((BLCKSZ - \
+						MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
+						MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
+
+/*
+ * Access macros for non-leaf entry tuples
+ */
+#define GinGetDownlink(itup)	GinItemPointerGetBlockNumber(&(itup)->t_tid)
+#define GinSetDownlink(itup,blkno)	ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
+
+
+/*
+ * Data (posting tree) pages
+ *
+ * Posting tree pages don't store regular tuples. Non-leaf pages contain
+ * PostingItems, which are pairs of ItemPointers and child block numbers.
+ * Leaf pages contain GinPostingLists and an uncompressed array of item
+ * pointers.
+ *
+ * In a leaf page, the compressed posting lists are stored after the regular
+ * page header, one after each other. Although we don't store regular tuples,
+ * pd_lower is used to indicate the end of the posting lists. After that, free
+ * space follows.  This layout is compatible with the "standard" heap and
+ * index page layout described in bufpage.h, so that we can e.g set buffer_std
+ * when writing WAL records.
+ *
+ * In the special space is the GinPageOpaque struct.
+ */
+#define GinDataLeafPageGetPostingList(page) \
+	(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
+#define GinDataLeafPageGetPostingListSize(page) \
+	(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
+
+#define GinDataLeafPageIsEmpty(page) \
+	(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
+
+#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
+
+#define GinDataPageGetRightBound(page)	((ItemPointer) PageGetContents(page))
+/*
+ * Pointer to the data portion of a posting tree page. For internal pages,
+ * that's the beginning of the array of PostingItems. For compressed leaf
+ * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
+ * pages, it's the beginning of the ItemPointer array.
+ */
+#define GinDataPageGetData(page)	\
+	(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
+/* non-leaf pages contain PostingItems */
+#define GinDataPageGetPostingItem(page, i)	\
+	((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
+
+/*
+ * Note: there is no GinDataPageGetDataSize macro, because before version
+ * 9.4, we didn't set pd_lower on data pages. There can be pages in the index
+ * that were binary-upgraded from earlier versions and still have an invalid
+ * pd_lower, so we cannot trust it in general. Compressed posting tree leaf
+ * pages are new in 9.4, however, so we can trust them; see
+ * GinDataLeafPageGetPostingListSize.
+ */
+#define GinDataPageSetDataSize(page, size) \
+	{ \
+		Assert(size <= GinDataPageMaxDataSize); \
+		((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
+	}
+
+#define GinNonLeafDataPageGetFreeSpace(page)	\
+	(GinDataPageMaxDataSize - \
+	 GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
+
+#define GinDataPageMaxDataSize	\
+	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
+	 - MAXALIGN(sizeof(ItemPointerData)) \
+	 - MAXALIGN(sizeof(GinPageOpaqueData)))
+
+/*
+ * List pages
+ */
+#define GinListPageSize  \
+	( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+/*
+ * A compressed posting list.
+ *
+ * Note: This requires 2-byte alignment.
+ */
+typedef struct
+{
+	ItemPointerData first;		/* first item in this posting list (unpacked) */
+	uint16		nbytes;			/* number of bytes that follow */
+	unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
+} GinPostingList;
+
+#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
+#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
+
+#endif							/* GINBLOCK_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
new file mode 100644
index 0000000..8a2507b
--- /dev/null
+++ b/src/include/access/ginxlog.h
@@ -0,0 +1,216 @@
+/*--------------------------------------------------------------------------
+ * ginxlog.h
+ *	  header file for postgres inverted index xlog implementation.
+ *
+ *	Copyright (c) 2006-2021, PostgreSQL Global Development Group
+ *
+ *	src/include/access/ginxlog.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GINXLOG_H
+#define GINXLOG_H
+
+#include "access/ginblock.h"
+#include "access/itup.h"
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/off.h"
+
+#define XLOG_GIN_CREATE_PTREE  0x10
+
+typedef struct ginxlogCreatePostingTree
+{
+	uint32		size;
+	/* A compressed posting list follows */
+} ginxlogCreatePostingTree;
+
+/*
+ * The format of the insertion record varies depending on the page type.
+ * ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
+ */
+
+#define XLOG_GIN_INSERT  0x20
+
+typedef struct
+{
+	uint16		flags;			/* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
+
+	/*
+	 * FOLLOWS:
+	 *
+	 * 1. if not leaf page, block numbers of the left and right child pages
+	 * whose split this insertion finishes, as BlockIdData[2] (beware of
+	 * adding fields in this struct that would make them not 16-bit aligned)
+	 *
+	 * 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
+	 * on tree type.
+	 *
+	 * NB: the below structs are only 16-bit aligned when appended to a
+	 * ginxlogInsert struct! Beware of adding fields to them that require
+	 * stricter alignment.
+	 */
+} ginxlogInsert;
+
+typedef struct
+{
+	OffsetNumber offset;
+	bool		isDelete;
+	IndexTupleData tuple;		/* variable length */
+} ginxlogInsertEntry;
+
+
+typedef struct
+{
+	uint16		nactions;
+
+	/* Variable number of 'actions' follow */
+} ginxlogRecompressDataLeaf;
+
+/*
+ * Note: this struct is currently not used in code, and only acts as
+ * documentation. The WAL record format is as specified here, but the code
+ * uses straight access through a Pointer and memcpy to read/write these.
+ */
+typedef struct
+{
+	uint8		segno;			/* segment this action applies to */
+	char		type;			/* action type (see below) */
+
+	/*
+	 * Action-specific data follows. For INSERT and REPLACE actions that is a
+	 * GinPostingList struct. For ADDITEMS, a uint16 for the number of items
+	 * added, followed by the items themselves as ItemPointers. DELETE actions
+	 * have no further data.
+	 */
+}			ginxlogSegmentAction;
+
+/* Action types */
+#define GIN_SEGMENT_UNMODIFIED	0	/* no action (not used in WAL records) */
+#define GIN_SEGMENT_DELETE		1	/* a whole segment is removed */
+#define GIN_SEGMENT_INSERT		2	/* a whole segment is added */
+#define GIN_SEGMENT_REPLACE		3	/* a segment is replaced */
+#define GIN_SEGMENT_ADDITEMS	4	/* items are added to existing segment */
+
+typedef struct
+{
+	OffsetNumber offset;
+	PostingItem newitem;
+} ginxlogInsertDataInternal;
+
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
+#define XLOG_GIN_SPLIT	0x30
+
+typedef struct ginxlogSplit
+{
+	RelFileNode node;
+	BlockNumber rrlink;			/* right link, or root's blocknumber if root
+								 * split */
+	BlockNumber leftChildBlkno; /* valid on a non-leaf split */
+	BlockNumber rightChildBlkno;
+	uint16		flags;			/* see below */
+} ginxlogSplit;
+
+/*
+ * Flags used in ginxlogInsert and ginxlogSplit records
+ */
+#define GIN_INSERT_ISDATA	0x01	/* for both insert and split records */
+#define GIN_INSERT_ISLEAF	0x02	/* ditto */
+#define GIN_SPLIT_ROOT		0x04	/* only for split records */
+
+/*
+ * Vacuum simply WAL-logs the whole page, when anything is modified. This
+ * is functionally identical to XLOG_FPI records, but is kept separate for
+ * debugging purposes. (When inspecting the WAL stream, it's easier to see
+ * what's going on when GIN vacuum records are marked as such, not as heap
+ * records.) This is currently only used for entry tree leaf pages.
+ */
+#define XLOG_GIN_VACUUM_PAGE	0x40
+
+/*
+ * Vacuuming posting tree leaf page is WAL-logged like recompression caused
+ * by insertion.
+ */
+#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE	0x90
+
+typedef struct ginxlogVacuumDataLeafPage
+{
+	ginxlogRecompressDataLeaf data;
+} ginxlogVacuumDataLeafPage;
+
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
+#define XLOG_GIN_DELETE_PAGE	0x50
+
+typedef struct ginxlogDeletePage
+{
+	OffsetNumber parentOffset;
+	BlockNumber rightLink;
+	TransactionId deleteXid;	/* last Xid which could see this page in scan */
+} ginxlogDeletePage;
+
+#define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
+typedef struct ginxlogUpdateMeta
+{
+	RelFileNode node;
+	GinMetaPageData metadata;
+	BlockNumber prevTail;
+	BlockNumber newRightlink;
+	int32		ntuples;		/* if ntuples > 0 then metadata.tail was
+								 * updated with that many tuples; else new sub
+								 * list was inserted */
+	/* array of inserted tuples follows */
+} ginxlogUpdateMeta;
+
+#define XLOG_GIN_INSERT_LISTPAGE  0x70
+
+typedef struct ginxlogInsertListPage
+{
+	BlockNumber rightlink;
+	int32		ntuples;
+	/* array of inserted tuples follows */
+} ginxlogInsertListPage;
+
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
+#define XLOG_GIN_DELETE_LISTPAGE  0x80
+
+/*
+ * The WAL record for deleting list pages must contain a block reference to
+ * all the deleted pages, so the number of pages that can be deleted in one
+ * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
+ * metapage.)
+ */
+#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
+typedef struct ginxlogDeleteListPages
+{
+	GinMetaPageData metadata;
+	int32		ndeleted;
+} ginxlogDeleteListPages;
+
+extern void gin_redo(XLogReaderState *record);
+extern void gin_desc(StringInfo buf, XLogReaderState *record);
+extern const char *gin_identify(uint8 info);
+extern void gin_xlog_startup(void);
+extern void gin_xlog_cleanup(void);
+extern void gin_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* GINXLOG_H */
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
new file mode 100644
index 0000000..4b06575
--- /dev/null
+++ b/src/include/access/gist.h
@@ -0,0 +1,248 @@
+/*-------------------------------------------------------------------------
+ *
+ * gist.h
+ *	  The public API for GiST indexes. This API is exposed to
+ *	  individuals implementing GiST indexes, so backward-incompatible
+ *	  changes should be made with care.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/gist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GIST_H
+#define GIST_H
+
+#include "access/itup.h"
+#include "access/transam.h"
+#include "access/xlog.h"
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/bufpage.h"
+#include "utils/relcache.h"
+
+/*
+ * amproc indexes for GiST indexes.
+ */
+#define GIST_CONSISTENT_PROC			1
+#define GIST_UNION_PROC					2
+#define GIST_COMPRESS_PROC				3
+#define GIST_DECOMPRESS_PROC			4
+#define GIST_PENALTY_PROC				5
+#define GIST_PICKSPLIT_PROC				6
+#define GIST_EQUAL_PROC					7
+#define GIST_DISTANCE_PROC				8
+#define GIST_FETCH_PROC					9
+#define GIST_OPTIONS_PROC				10
+#define GIST_SORTSUPPORT_PROC			11
+#define GISTNProcs					11
+
+/*
+ * Page opaque data in a GiST index page.
+ */
+#define F_LEAF				(1 << 0)	/* leaf page */
+#define F_DELETED			(1 << 1)	/* the page has been deleted */
+#define F_TUPLES_DELETED	(1 << 2)	/* some tuples on the page were
+										 * deleted */
+#define F_FOLLOW_RIGHT		(1 << 3)	/* page to the right has no downlink */
+#define F_HAS_GARBAGE		(1 << 4)	/* some tuples on the page are dead,
+										 * but not deleted yet */
+
+/*
+ * NSN (node sequence number) is a special-purpose LSN which is stored on each
+ * index page in GISTPageOpaqueData and updated only during page splits.  By
+ * recording the parent's LSN in GISTSearchItem.parentlsn, it is possible to
+ * detect concurrent child page splits by checking if parentlsn < child's NSN,
+ * and handle them properly.  The child page's LSN is insufficient for this
+ * purpose since it is updated for every page change.
+ */
+typedef XLogRecPtr GistNSN;
+
+/*
+ * A fake LSN / NSN value used during index builds. Must be smaller than any
+ * real or fake (unlogged) LSN generated after the index build completes so
+ * that all splits are considered complete.
+ */
+#define GistBuildLSN	((XLogRecPtr) 1)
+
+/*
+ * For on-disk compatibility with pre-9.3 servers, NSN is stored as two
+ * 32-bit fields on disk, same as LSNs.
+ */
+typedef PageXLogRecPtr PageGistNSN;
+
+typedef struct GISTPageOpaqueData
+{
+	PageGistNSN nsn;			/* this value must change on page split */
+	BlockNumber rightlink;		/* next page if any */
+	uint16		flags;			/* see bit definitions above */
+	uint16		gist_page_id;	/* for identification of GiST indexes */
+} GISTPageOpaqueData;
+
+typedef GISTPageOpaqueData *GISTPageOpaque;
+
+/*
+ * Maximum possible sizes for GiST index tuple and index key.  Calculation is
+ * based on assumption that GiST page should fit at least 4 tuples.  In theory,
+ * GiST index can be functional when page can fit 3 tuples.  But that seems
+ * rather inefficient, so we use a bit conservative estimate.
+ *
+ * The maximum size of index key is true for unicolumn index.  Therefore, this
+ * estimation should be used to figure out which maximum size of GiST index key
+ * makes sense at all.  For multicolumn indexes, user might be able to tune
+ * key size using opclass parameters.
+ */
+#define GISTMaxIndexTupleSize	\
+	MAXALIGN_DOWN((BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)) / \
+				  4 - sizeof(ItemIdData))
+
+#define GISTMaxIndexKeySize	\
+	(GISTMaxIndexTupleSize - MAXALIGN(sizeof(IndexTupleData)))
+
+/*
+ * The page ID is for the convenience of pg_filedump and similar utilities,
+ * which otherwise would have a hard time telling pages of different index
+ * types apart.  It should be the last 2 bytes on the page.  This is more or
+ * less "free" due to alignment considerations.
+ */
+#define GIST_PAGE_ID		0xFF81
+
+/*
+ * This is the Split Vector to be returned by the PickSplit method.
+ * PickSplit should fill the indexes of tuples to go to the left side into
+ * spl_left[], and those to go to the right into spl_right[] (note the method
+ * is responsible for palloc'ing both of these arrays!).  The tuple counts
+ * go into spl_nleft/spl_nright, and spl_ldatum/spl_rdatum must be set to
+ * the union keys for each side.
+ *
+ * If spl_ldatum_exists and spl_rdatum_exists are true, then we are performing
+ * a "secondary split" using a non-first index column.  In this case some
+ * decisions have already been made about a page split, and the set of tuples
+ * being passed to PickSplit is just the tuples about which we are undecided.
+ * spl_ldatum/spl_rdatum then contain the union keys for the tuples already
+ * chosen to go left or right.  Ideally the PickSplit method should take those
+ * keys into account while deciding what to do with the remaining tuples, ie
+ * it should try to "build out" from those unions so as to minimally expand
+ * them.  If it does so, it should union the given tuples' keys into the
+ * existing spl_ldatum/spl_rdatum values rather than just setting those values
+ * from scratch, and then set spl_ldatum_exists/spl_rdatum_exists to false to
+ * show it has done this.
+ *
+ * If the PickSplit method fails to clear spl_ldatum_exists/spl_rdatum_exists,
+ * the core GiST code will make its own decision about how to merge the
+ * secondary-split results with the previously-chosen tuples, and will then
+ * recompute the union keys from scratch.  This is a workable though often not
+ * optimal approach.
+ */
+typedef struct GIST_SPLITVEC
+{
+	OffsetNumber *spl_left;		/* array of entries that go left */
+	int			spl_nleft;		/* size of this array */
+	Datum		spl_ldatum;		/* Union of keys in spl_left */
+	bool		spl_ldatum_exists;	/* true, if spl_ldatum already exists. */
+
+	OffsetNumber *spl_right;	/* array of entries that go right */
+	int			spl_nright;		/* size of the array */
+	Datum		spl_rdatum;		/* Union of keys in spl_right */
+	bool		spl_rdatum_exists;	/* true, if spl_rdatum already exists. */
+} GIST_SPLITVEC;
+
+/*
+ * An entry on a GiST node.  Contains the key, as well as its own
+ * location (rel,page,offset) which can supply the matching pointer.
+ * leafkey is a flag to tell us if the entry is in a leaf node.
+ */
+typedef struct GISTENTRY
+{
+	Datum		key;
+	Relation	rel;
+	Page		page;
+	OffsetNumber offset;
+	bool		leafkey;
+} GISTENTRY;
+
+#define GistPageGetOpaque(page) ( (GISTPageOpaque) PageGetSpecialPointer(page) )
+
+#define GistPageIsLeaf(page)	( GistPageGetOpaque(page)->flags & F_LEAF)
+#define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page))
+
+#define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED)
+
+#define GistTuplesDeleted(page) ( GistPageGetOpaque(page)->flags & F_TUPLES_DELETED)
+#define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED)
+#define GistClearTuplesDeleted(page)	( GistPageGetOpaque(page)->flags &= ~F_TUPLES_DELETED)
+
+#define GistPageHasGarbage(page) ( GistPageGetOpaque(page)->flags & F_HAS_GARBAGE)
+#define GistMarkPageHasGarbage(page) ( GistPageGetOpaque(page)->flags |= F_HAS_GARBAGE)
+#define GistClearPageHasGarbage(page)	( GistPageGetOpaque(page)->flags &= ~F_HAS_GARBAGE)
+
+#define GistFollowRight(page) ( GistPageGetOpaque(page)->flags & F_FOLLOW_RIGHT)
+#define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT)
+#define GistClearFollowRight(page)	( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
+
+#define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn))
+#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val))
+
+
+/*
+ * On a deleted page, we store this struct. A deleted page doesn't contain any
+ * tuples, so we don't use the normal page layout with line pointers. Instead,
+ * this struct is stored right after the standard page header. pd_lower points
+ * to the end of this struct. If we add fields to this struct in the future, we
+ * can distinguish the old and new formats by pd_lower.
+ */
+typedef struct GISTDeletedPageContents
+{
+	/* last xid which could see the page in a scan */
+	FullTransactionId deleteXid;
+} GISTDeletedPageContents;
+
+static inline void
+GistPageSetDeleted(Page page, FullTransactionId deletexid)
+{
+	Assert(PageIsEmpty(page));
+
+	GistPageGetOpaque(page)->flags |= F_DELETED;
+	((PageHeader) page)->pd_lower = MAXALIGN(SizeOfPageHeaderData) + sizeof(GISTDeletedPageContents);
+
+	((GISTDeletedPageContents *) PageGetContents(page))->deleteXid = deletexid;
+}
+
+static inline FullTransactionId
+GistPageGetDeleteXid(Page page)
+{
+	Assert(GistPageIsDeleted(page));
+
+	/* Is the deleteXid field present? */
+	if (((PageHeader) page)->pd_lower >= MAXALIGN(SizeOfPageHeaderData) +
+		offsetof(GISTDeletedPageContents, deleteXid) + sizeof(FullTransactionId))
+	{
+		return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid;
+	}
+	else
+		return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+}
+
+/*
+ * Vector of GISTENTRY structs; user-defined methods union and picksplit
+ * take it as one of their arguments
+ */
+typedef struct
+{
+	int32		n;				/* number of elements */
+	GISTENTRY	vector[FLEXIBLE_ARRAY_MEMBER];
+} GistEntryVector;
+
+#define GEVHDRSZ	(offsetof(GistEntryVector, vector))
+
+/*
+ * macro to initialize a GISTENTRY
+ */
+#define gistentryinit(e, k, r, pg, o, l) \
+	do { (e).key = (k); (e).rel = (r); (e).page = (pg); \
+		 (e).offset = (o); (e).leafkey = (l); } while (0)
+
+#endif							/* GIST_H */
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
new file mode 100644
index 0000000..553d364
--- /dev/null
+++ b/src/include/access/gist_private.h
@@ -0,0 +1,571 @@
+/*-------------------------------------------------------------------------
+ *
+ * gist_private.h
+ *	  private declarations for GiST -- declarations related to the
+ *	  internal implementation of GiST, not the public API
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/gist_private.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GIST_PRIVATE_H
+#define GIST_PRIVATE_H
+
+#include "access/amapi.h"
+#include "access/gist.h"
+#include "access/itup.h"
+#include "lib/pairingheap.h"
+#include "storage/bufmgr.h"
+#include "storage/buffile.h"
+#include "utils/hsearch.h"
+#include "access/genam.h"
+
+/*
+ * Maximum number of "halves" a page can be split into in one operation.
+ * Typically a split produces 2 halves, but can be more if keys have very
+ * different lengths, or when inserting multiple keys in one operation (as
+ * when inserting downlinks to an internal node).  There is no theoretical
+ * limit on this, but in practice if you get more than a handful page halves
+ * in one split, there's something wrong with the opclass implementation.
+ * GIST_MAX_SPLIT_PAGES is an arbitrary limit on that, used to size some
+ * local arrays used during split.  Note that there is also a limit on the
+ * number of buffers that can be held locked at a time, MAX_SIMUL_LWLOCKS,
+ * so if you raise this higher than that limit, you'll just get a different
+ * error.
+ */
+#define GIST_MAX_SPLIT_PAGES		75
+
+/* Buffer lock modes */
+#define GIST_SHARE	BUFFER_LOCK_SHARE
+#define GIST_EXCLUSIVE	BUFFER_LOCK_EXCLUSIVE
+#define GIST_UNLOCK BUFFER_LOCK_UNLOCK
+
+typedef struct
+{
+	BlockNumber prev;
+	uint32		freespace;
+	char		tupledata[FLEXIBLE_ARRAY_MEMBER];
+} GISTNodeBufferPage;
+
+#define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
+/* Returns free space in node buffer page */
+#define PAGE_FREE_SPACE(nbp) (nbp->freespace)
+/* Checks if node buffer page is empty */
+#define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
+/* Checks if node buffers page don't contain sufficient space for index tuple */
+#define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
+										MAXALIGN(IndexTupleSize(itup)))
+
+/*
+ * GISTSTATE: information needed for any GiST index operation
+ *
+ * This struct retains call info for the index's opclass-specific support
+ * functions (per index column), plus the index's tuple descriptor.
+ *
+ * scanCxt holds the GISTSTATE itself as well as any data that lives for the
+ * lifetime of the index operation.  We pass this to the support functions
+ * via fn_mcxt, so that they can store scan-lifespan data in it.  The
+ * functions are invoked in tempCxt, which is typically short-lifespan
+ * (that is, it's reset after each tuple).  However, tempCxt can be the same
+ * as scanCxt if we're not bothering with per-tuple context resets.
+ */
+typedef struct GISTSTATE
+{
+	MemoryContext scanCxt;		/* context for scan-lifespan data */
+	MemoryContext tempCxt;		/* short-term context for calling functions */
+
+	TupleDesc	leafTupdesc;	/* index's tuple descriptor */
+	TupleDesc	nonLeafTupdesc; /* truncated tuple descriptor for non-leaf
+								 * pages */
+	TupleDesc	fetchTupdesc;	/* tuple descriptor for tuples returned in an
+								 * index-only scan */
+
+	FmgrInfo	consistentFn[INDEX_MAX_KEYS];
+	FmgrInfo	unionFn[INDEX_MAX_KEYS];
+	FmgrInfo	compressFn[INDEX_MAX_KEYS];
+	FmgrInfo	decompressFn[INDEX_MAX_KEYS];
+	FmgrInfo	penaltyFn[INDEX_MAX_KEYS];
+	FmgrInfo	picksplitFn[INDEX_MAX_KEYS];
+	FmgrInfo	equalFn[INDEX_MAX_KEYS];
+	FmgrInfo	distanceFn[INDEX_MAX_KEYS];
+	FmgrInfo	fetchFn[INDEX_MAX_KEYS];
+
+	/* Collations to pass to the support functions */
+	Oid			supportCollation[INDEX_MAX_KEYS];
+} GISTSTATE;
+
+
+/*
+ * During a GiST index search, we must maintain a queue of unvisited items,
+ * which can be either individual heap tuples or whole index pages.  If it
+ * is an ordered search, the unvisited items should be visited in distance
+ * order.  Unvisited items at the same distance should be visited in
+ * depth-first order, that is heap items first, then lower index pages, then
+ * upper index pages; this rule avoids doing extra work during a search that
+ * ends early due to LIMIT.
+ *
+ * To perform an ordered search, we use a pairing heap to manage the
+ * distance-order queue.  In a non-ordered search (no order-by operators),
+ * we use it to return heap tuples before unvisited index pages, to
+ * ensure depth-first order, but all entries are otherwise considered
+ * equal.
+ */
+
+/* Individual heap tuple to be visited */
+typedef struct GISTSearchHeapItem
+{
+	ItemPointerData heapPtr;
+	bool		recheck;		/* T if quals must be rechecked */
+	bool		recheckDistances;	/* T if distances must be rechecked */
+	HeapTuple	recontup;		/* data reconstructed from the index, used in
+								 * index-only scans */
+	OffsetNumber offnum;		/* track offset in page to mark tuple as
+								 * LP_DEAD */
+} GISTSearchHeapItem;
+
+/* Unvisited item, either index page or heap tuple */
+typedef struct GISTSearchItem
+{
+	pairingheap_node phNode;
+	BlockNumber blkno;			/* index page number, or InvalidBlockNumber */
+	union
+	{
+		GistNSN		parentlsn;	/* parent page's LSN, if index page */
+		/* we must store parentlsn to detect whether a split occurred */
+		GISTSearchHeapItem heap;	/* heap info, if heap tuple */
+	}			data;
+
+	/* numberOfOrderBys entries */
+	IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
+} GISTSearchItem;
+
+#define GISTSearchItemIsHeap(item)	((item).blkno == InvalidBlockNumber)
+
+#define SizeOfGISTSearchItem(n_distances) \
+	(offsetof(GISTSearchItem, distances) + \
+	 sizeof(IndexOrderByDistance) * (n_distances))
+
+/*
+ * GISTScanOpaqueData: private state for a scan of a GiST index
+ */
+typedef struct GISTScanOpaqueData
+{
+	GISTSTATE  *giststate;		/* index information, see above */
+	Oid		   *orderByTypes;	/* datatypes of ORDER BY expressions */
+
+	pairingheap *queue;			/* queue of unvisited items */
+	MemoryContext queueCxt;		/* context holding the queue */
+	bool		qual_ok;		/* false if qual can never be satisfied */
+	bool		firstCall;		/* true until first gistgettuple call */
+
+	/* pre-allocated workspace arrays */
+	IndexOrderByDistance *distances;	/* output area for gistindex_keytest */
+
+	/* info about killed items if any (killedItems is NULL if never used) */
+	OffsetNumber *killedItems;	/* offset numbers of killed items */
+	int			numKilled;		/* number of currently stored items */
+	BlockNumber curBlkno;		/* current number of block */
+	GistNSN		curPageLSN;		/* pos in the WAL stream when page was read */
+
+	/* In a non-ordered search, returnable heap items are stored here: */
+	GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
+	OffsetNumber nPageData;		/* number of valid items in array */
+	OffsetNumber curPageData;	/* next item to return */
+	MemoryContext pageDataCxt;	/* context holding the fetched tuples, for
+								 * index-only scans */
+} GISTScanOpaqueData;
+
+typedef GISTScanOpaqueData *GISTScanOpaque;
+
+/* despite the name, gistxlogPage is not part of any xlog record */
+typedef struct gistxlogPage
+{
+	BlockNumber blkno;
+	int			num;			/* number of index tuples following */
+} gistxlogPage;
+
+/* SplitedPageLayout - gistSplit function result */
+typedef struct SplitedPageLayout
+{
+	gistxlogPage block;
+	IndexTupleData *list;
+	int			lenlist;
+	IndexTuple	itup;			/* union key for page */
+	Page		page;			/* to operate */
+	Buffer		buffer;			/* to write after all proceed */
+
+	struct SplitedPageLayout *next;
+} SplitedPageLayout;
+
+/*
+ * GISTInsertStack used for locking buffers and transfer arguments during
+ * insertion
+ */
+typedef struct GISTInsertStack
+{
+	/* current page */
+	BlockNumber blkno;
+	Buffer		buffer;
+	Page		page;
+
+	/*
+	 * log sequence number from page->lsn to recognize page update and compare
+	 * it with page's nsn to recognize page split
+	 */
+	GistNSN		lsn;
+
+	/*
+	 * If set, we split the page while descending the tree to find an
+	 * insertion target. It means that we need to retry from the parent,
+	 * because the downlink of this page might no longer cover the new key.
+	 */
+	bool		retry_from_parent;
+
+	/* offset of the downlink in the parent page, that points to this page */
+	OffsetNumber downlinkoffnum;
+
+	/* pointer to parent */
+	struct GISTInsertStack *parent;
+} GISTInsertStack;
+
+/* Working state and results for multi-column split logic in gistsplit.c */
+typedef struct GistSplitVector
+{
+	GIST_SPLITVEC splitVector;	/* passed to/from user PickSplit method */
+
+	Datum		spl_lattr[INDEX_MAX_KEYS];	/* Union of subkeys in
+											 * splitVector.spl_left */
+	bool		spl_lisnull[INDEX_MAX_KEYS];
+
+	Datum		spl_rattr[INDEX_MAX_KEYS];	/* Union of subkeys in
+											 * splitVector.spl_right */
+	bool		spl_risnull[INDEX_MAX_KEYS];
+
+	bool	   *spl_dontcare;	/* flags tuples which could go to either side
+								 * of the split for zero penalty */
+} GistSplitVector;
+
+typedef struct
+{
+	Relation	r;
+	Relation	heapRel;
+	Size		freespace;		/* free space to be left */
+	bool		is_build;
+
+	GISTInsertStack *stack;
+} GISTInsertState;
+
+/* root page of a gist index */
+#define GIST_ROOT_BLKNO				0
+
+/*
+ * Before PostgreSQL 9.1, we used to rely on so-called "invalid tuples" on
+ * inner pages to finish crash recovery of incomplete page splits. If a crash
+ * happened in the middle of a page split, so that the downlink pointers were
+ * not yet inserted, crash recovery inserted a special downlink pointer. The
+ * semantics of an invalid tuple was that it if you encounter one in a scan,
+ * it must always be followed, because we don't know if the tuples on the
+ * child page match or not.
+ *
+ * We no longer create such invalid tuples, we now mark the left-half of such
+ * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
+ * split properly the next time we need to insert on that page. To retain
+ * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
+ * the offset number of all inner tuples. If we encounter any invalid tuples
+ * with 0xfffe during insertion, we throw an error, though scans still handle
+ * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
+ * gist index which already has invalid tuples in it because of a crash. That
+ * should be rare, and you are recommended to REINDEX anyway if you have any
+ * invalid tuples in an index, so throwing an error is as far as we go with
+ * supporting that.
+ */
+#define TUPLE_IS_VALID		0xffff
+#define TUPLE_IS_INVALID	0xfffe
+
+#define  GistTupleIsInvalid(itup)	( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
+#define  GistTupleSetValid(itup)	ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
+
+
+
+
+/*
+ * A buffer attached to an internal node, used when building an index in
+ * buffering mode.
+ */
+typedef struct
+{
+	BlockNumber nodeBlocknum;	/* index block # this buffer is for */
+	int32		blocksCount;	/* current # of blocks occupied by buffer */
+
+	BlockNumber pageBlocknum;	/* temporary file block # */
+	GISTNodeBufferPage *pageBuffer; /* in-memory buffer page */
+
+	/* is this buffer queued for emptying? */
+	bool		queuedForEmptying;
+
+	/* is this a temporary copy, not in the hash table? */
+	bool		isTemp;
+
+	int			level;			/* 0 == leaf */
+} GISTNodeBuffer;
+
+/*
+ * Does specified level have buffers? (Beware of multiple evaluation of
+ * arguments.)
+ */
+#define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
+	((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
+	 (nlevel) != (gfbb)->rootlevel)
+
+/* Is specified buffer at least half-filled (should be queued for emptying)? */
+#define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
+	((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
+
+/*
+ * Is specified buffer full? Our buffers can actually grow indefinitely,
+ * beyond the "maximum" size, so this just means whether the buffer has grown
+ * beyond the nominal maximum size.
+ */
+#define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
+	((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
+
+/*
+ * Data structure with general information about build buffers.
+ */
+typedef struct GISTBuildBuffers
+{
+	/* Persistent memory context for the buffers and metadata. */
+	MemoryContext context;
+
+	BufFile    *pfile;			/* Temporary file to store buffers in */
+	long		nFileBlocks;	/* Current size of the temporary file */
+
+	/*
+	 * resizable array of free blocks.
+	 */
+	long	   *freeBlocks;
+	int			nFreeBlocks;	/* # of currently free blocks in the array */
+	int			freeBlocksLen;	/* current allocated length of the array */
+
+	/* Hash for buffers by block number */
+	HTAB	   *nodeBuffersTab;
+
+	/* List of buffers scheduled for emptying */
+	List	   *bufferEmptyingQueue;
+
+	/*
+	 * Parameters to the buffering build algorithm. levelStep determines which
+	 * levels in the tree have buffers, and pagesPerBuffer determines how
+	 * large each buffer is.
+	 */
+	int			levelStep;
+	int			pagesPerBuffer;
+
+	/* Array of lists of buffers on each level, for final emptying */
+	List	  **buffersOnLevels;
+	int			buffersOnLevelsLen;
+
+	/*
+	 * Dynamically-sized array of buffers that currently have their last page
+	 * loaded in main memory.
+	 */
+	GISTNodeBuffer **loadedBuffers;
+	int			loadedBuffersCount; /* # of entries in loadedBuffers */
+	int			loadedBuffersLen;	/* allocated size of loadedBuffers */
+
+	/* Level of the current root node (= height of the index tree - 1) */
+	int			rootlevel;
+} GISTBuildBuffers;
+
+/* GiSTOptions->buffering_mode values */
+typedef enum GistOptBufferingMode
+{
+	GIST_OPTION_BUFFERING_AUTO,
+	GIST_OPTION_BUFFERING_ON,
+	GIST_OPTION_BUFFERING_OFF
+} GistOptBufferingMode;
+
+/*
+ * Storage type for GiST's reloptions
+ */
+typedef struct GiSTOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int			fillfactor;		/* page fill factor in percent (0..100) */
+	GistOptBufferingMode buffering_mode;	/* buffering build mode */
+} GiSTOptions;
+
+/* gist.c */
+extern void gistbuildempty(Relation index);
+extern bool gistinsert(Relation r, Datum *values, bool *isnull,
+					   ItemPointer ht_ctid, Relation heapRel,
+					   IndexUniqueCheck checkUnique,
+					   bool indexUnchanged,
+					   struct IndexInfo *indexInfo);
+extern MemoryContext createTempGistContext(void);
+extern GISTSTATE *initGISTstate(Relation index);
+extern void freeGISTstate(GISTSTATE *giststate);
+extern void gistdoinsert(Relation r,
+						 IndexTuple itup,
+						 Size freespace,
+						 GISTSTATE *giststate,
+						 Relation heapRel,
+						 bool is_build);
+
+/* A List of these is returned from gistplacetopage() in *splitinfo */
+typedef struct
+{
+	Buffer		buf;			/* the split page "half" */
+	IndexTuple	downlink;		/* downlink for this half. */
+} GISTPageSplitInfo;
+
+extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
+							Buffer buffer,
+							IndexTuple *itup, int ntup,
+							OffsetNumber oldoffnum, BlockNumber *newblkno,
+							Buffer leftchildbuf,
+							List **splitinfo,
+							bool markfollowright,
+							Relation heapRel,
+							bool is_build);
+
+extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
+									int len, GISTSTATE *giststate);
+
+/* gistxlog.c */
+extern XLogRecPtr gistXLogPageDelete(Buffer buffer,
+									 FullTransactionId xid, Buffer parentBuffer,
+									 OffsetNumber downlinkOffset);
+
+extern void gistXLogPageReuse(Relation rel, BlockNumber blkno,
+							  FullTransactionId latestRemovedXid);
+
+extern XLogRecPtr gistXLogUpdate(Buffer buffer,
+								 OffsetNumber *todelete, int ntodelete,
+								 IndexTuple *itup, int ntup,
+								 Buffer leftchild);
+
+extern XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete,
+								 int ntodelete, TransactionId latestRemovedXid);
+
+extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
+								SplitedPageLayout *dist,
+								BlockNumber origrlink, GistNSN oldnsn,
+								Buffer leftchild, bool markfollowright);
+
+extern XLogRecPtr gistXLogAssignLSN(void);
+
+/* gistget.c */
+extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
+extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern bool gistcanreturn(Relation index, int attno);
+
+/* gistvalidate.c */
+extern bool gistvalidate(Oid opclassoid);
+extern void gistadjustmembers(Oid opfamilyoid,
+							  Oid opclassoid,
+							  List *operators,
+							  List *functions);
+
+/* gistutil.c */
+
+#define GiSTPageSize   \
+	( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
+
+#define GIST_MIN_FILLFACTOR			10
+#define GIST_DEFAULT_FILLFACTOR		90
+
+extern bytea *gistoptions(Datum reloptions, bool validate);
+extern bool gistproperty(Oid index_oid, int attno,
+						 IndexAMProperty prop, const char *propname,
+						 bool *res, bool *isnull);
+extern bool gistfitpage(IndexTuple *itvec, int len);
+extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace);
+extern void gistcheckpage(Relation rel, Buffer buf);
+extern Buffer gistNewBuffer(Relation r);
+extern bool gistPageRecyclable(Page page);
+extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
+						   OffsetNumber off);
+extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
+extern IndexTuple *gistjoinvector(IndexTuple *itvec, int *len,
+								  IndexTuple *additvec, int addlen);
+extern IndexTupleData *gistfillitupvec(IndexTuple *vec, int veclen, int *memlen);
+
+extern IndexTuple gistunion(Relation r, IndexTuple *itvec,
+							int len, GISTSTATE *giststate);
+extern IndexTuple gistgetadjusted(Relation r,
+								  IndexTuple oldtup,
+								  IndexTuple addtup,
+								  GISTSTATE *giststate);
+extern IndexTuple gistFormTuple(GISTSTATE *giststate,
+								Relation r, Datum *attdata, bool *isnull, bool isleaf);
+extern void gistCompressValues(GISTSTATE *giststate, Relation r,
+							   Datum *attdata, bool *isnull, bool isleaf, Datum *compatt);
+
+extern OffsetNumber gistchoose(Relation r, Page p,
+							   IndexTuple it,
+							   GISTSTATE *giststate);
+
+extern void GISTInitBuffer(Buffer b, uint32 f);
+extern void gistinitpage(Page page, uint32 f);
+extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
+						   Datum k, Relation r, Page pg, OffsetNumber o,
+						   bool l, bool isNull);
+
+extern float gistpenalty(GISTSTATE *giststate, int attno,
+						 GISTENTRY *key1, bool isNull1,
+						 GISTENTRY *key2, bool isNull2);
+extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
+							   Datum *attr, bool *isnull);
+extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
+extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
+							  OffsetNumber o, GISTENTRY *attdata, bool *isnull);
+extern HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r,
+								IndexTuple tuple);
+extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
+							 GISTENTRY *entry1, bool isnull1,
+							 GISTENTRY *entry2, bool isnull2,
+							 Datum *dst, bool *dstisnull);
+
+extern XLogRecPtr gistGetFakeLSN(Relation rel);
+
+/* gistvacuum.c */
+extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info,
+											 IndexBulkDeleteResult *stats,
+											 IndexBulkDeleteCallback callback,
+											 void *callback_state);
+extern IndexBulkDeleteResult *gistvacuumcleanup(IndexVacuumInfo *info,
+												IndexBulkDeleteResult *stats);
+
+/* gistsplit.c */
+extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
+						   int len, GISTSTATE *giststate,
+						   GistSplitVector *v,
+						   int attno);
+
+/* gistbuild.c */
+extern IndexBuildResult *gistbuild(Relation heap, Relation index,
+								   struct IndexInfo *indexInfo);
+extern void gistValidateBufferingOption(const char *value);
+
+/* gistbuildbuffers.c */
+extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep,
+											  int maxLevel);
+extern GISTNodeBuffer *gistGetNodeBuffer(GISTBuildBuffers *gfbb,
+										 GISTSTATE *giststate,
+										 BlockNumber blkno, int level);
+extern void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb,
+									 GISTNodeBuffer *nodeBuffer, IndexTuple item);
+extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb,
+									  GISTNodeBuffer *nodeBuffer, IndexTuple *item);
+extern void gistFreeBuildBuffers(GISTBuildBuffers *gfbb);
+extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb,
+											GISTSTATE *giststate, Relation r,
+											int level, Buffer buffer,
+											List *splitinfo);
+extern void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb);
+
+#endif							/* GIST_PRIVATE_H */
diff --git a/src/include/access/gistscan.h b/src/include/access/gistscan.h
new file mode 100644
index 0000000..54451b5
--- /dev/null
+++ b/src/include/access/gistscan.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistscan.h
+ *	  routines defined in access/gist/gistscan.c
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/gistscan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GISTSCAN_H
+#define GISTSCAN_H
+
+#include "access/amapi.h"
+
+extern IndexScanDesc gistbeginscan(Relation r, int nkeys, int norderbys);
+extern void gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
+					   ScanKey orderbys, int norderbys);
+extern void gistendscan(IndexScanDesc scan);
+
+#endif							/* GISTSCAN_H */
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
new file mode 100644
index 0000000..fd5144f
--- /dev/null
+++ b/src/include/access/gistxlog.h
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistxlog.h
+ *	  gist xlog routines
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/gistxlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef GIST_XLOG_H
+#define GIST_XLOG_H
+
+#include "access/gist.h"
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+
+#define XLOG_GIST_PAGE_UPDATE		0x00
+#define XLOG_GIST_DELETE			0x10	/* delete leaf index tuples for a
+											 * page */
+#define XLOG_GIST_PAGE_REUSE		0x20	/* old page is about to be reused
+											 * from FSM */
+#define XLOG_GIST_PAGE_SPLIT		0x30
+ /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
+ /* #define XLOG_GIST_CREATE_INDEX		 0x50 */	/* not used anymore */
+#define XLOG_GIST_PAGE_DELETE		0x60
+#define XLOG_GIST_ASSIGN_LSN		0x70	/* nop, assign new LSN */
+
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ *				 downlink for the split page, the left half of the split
+ */
+typedef struct gistxlogPageUpdate
+{
+	/* number of deleted offsets */
+	uint16		ntodelete;
+	uint16		ntoinsert;
+
+	/*
+	 * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
+	 */
+} gistxlogPageUpdate;
+
+/*
+ * Backup Blk 0: Leaf page, whose index tuples are deleted.
+ */
+typedef struct gistxlogDelete
+{
+	TransactionId latestRemovedXid;
+	uint16		ntodelete;		/* number of deleted offsets */
+
+	/*
+	 * In payload of blk 0 : todelete OffsetNumbers
+	 */
+} gistxlogDelete;
+
+#define SizeOfGistxlogDelete	(offsetof(gistxlogDelete, ntodelete) + sizeof(uint16))
+
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ *				 downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
+typedef struct gistxlogPageSplit
+{
+	BlockNumber origrlink;		/* rightlink of the page before split */
+	GistNSN		orignsn;		/* NSN of the page before split */
+	bool		origleaf;		/* was splitted page a leaf page? */
+
+	uint16		npage;			/* # of pages in the split */
+	bool		markfollowright;	/* set F_FOLLOW_RIGHT flags */
+
+	/*
+	 * follow: 1. gistxlogPage and array of IndexTupleData per page
+	 */
+} gistxlogPageSplit;
+
+/*
+ * Backup Blk 0: page that was deleted.
+ * Backup Blk 1: parent page, containing the downlink to the deleted page.
+ */
+typedef struct gistxlogPageDelete
+{
+	FullTransactionId deleteXid;	/* last Xid which could see page in scan */
+	OffsetNumber downlinkOffset;	/* Offset of downlink referencing this
+									 * page */
+} gistxlogPageDelete;
+
+#define SizeOfGistxlogPageDelete	(offsetof(gistxlogPageDelete, downlinkOffset) + sizeof(OffsetNumber))
+
+
+/*
+ * This is what we need to know about page reuse, for hot standby.
+ */
+typedef struct gistxlogPageReuse
+{
+	RelFileNode node;
+	BlockNumber block;
+	FullTransactionId latestRemovedFullXid;
+} gistxlogPageReuse;
+
+#define SizeOfGistxlogPageReuse	(offsetof(gistxlogPageReuse, latestRemovedFullXid) + sizeof(FullTransactionId))
+
+extern void gist_redo(XLogReaderState *record);
+extern void gist_desc(StringInfo buf, XLogReaderState *record);
+extern const char *gist_identify(uint8 info);
+extern void gist_xlog_startup(void);
+extern void gist_xlog_cleanup(void);
+extern void gist_mask(char *pagedata, BlockNumber blkno);
+
+#endif
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
new file mode 100644
index 0000000..1cce865
--- /dev/null
+++ b/src/include/access/hash.h
@@ -0,0 +1,483 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash.h
+ *	  header file for postgres hash access method implementation
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/hash.h
+ *
+ * NOTES
+ *		modeled after Margo Seltzer's hash implementation for unix.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HASH_H
+#define HASH_H
+
+#include "access/amapi.h"
+#include "access/itup.h"
+#include "access/sdir.h"
+#include "catalog/pg_am_d.h"
+#include "common/hashfn.h"
+#include "lib/stringinfo.h"
+#include "storage/bufmgr.h"
+#include "storage/lockdefs.h"
+#include "utils/hsearch.h"
+#include "utils/relcache.h"
+
+/*
+ * Mapping from hash bucket number to physical block number of bucket's
+ * starting page.  Beware of multiple evaluations of argument!
+ */
+typedef uint32 Bucket;
+
+#define InvalidBucket	((Bucket) 0xFFFFFFFF)
+
+#define BUCKET_TO_BLKNO(metap,B) \
+		((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
+
+/*
+ * Special space for hash index pages.
+ *
+ * hasho_flag's LH_PAGE_TYPE bits tell us which type of page we're looking at.
+ * Additional bits in the flag word are used for more transient purposes.
+ *
+ * To test a page's type, do (hasho_flag & LH_PAGE_TYPE) == LH_xxx_PAGE.
+ * However, we ensure that each used page type has a distinct bit so that
+ * we can OR together page types for uses such as the allowable-page-types
+ * argument of _hash_checkpage().
+ */
+#define LH_UNUSED_PAGE			(0)
+#define LH_OVERFLOW_PAGE		(1 << 0)
+#define LH_BUCKET_PAGE			(1 << 1)
+#define LH_BITMAP_PAGE			(1 << 2)
+#define LH_META_PAGE			(1 << 3)
+#define LH_BUCKET_BEING_POPULATED	(1 << 4)
+#define LH_BUCKET_BEING_SPLIT	(1 << 5)
+#define LH_BUCKET_NEEDS_SPLIT_CLEANUP	(1 << 6)
+#define LH_PAGE_HAS_DEAD_TUPLES (1 << 7)
+
+#define LH_PAGE_TYPE \
+	(LH_OVERFLOW_PAGE | LH_BUCKET_PAGE | LH_BITMAP_PAGE | LH_META_PAGE)
+
+/*
+ * In an overflow page, hasho_prevblkno stores the block number of the previous
+ * page in the bucket chain; in a bucket page, hasho_prevblkno stores the
+ * hashm_maxbucket value as of the last time the bucket was last split, or
+ * else as of the time the bucket was created.  The latter convention is used
+ * to determine whether a cached copy of the metapage is too stale to be used
+ * without needing to lock or pin the metapage.
+ *
+ * hasho_nextblkno is always the block number of the next page in the
+ * bucket chain, or InvalidBlockNumber if there are no more such pages.
+ */
+typedef struct HashPageOpaqueData
+{
+	BlockNumber hasho_prevblkno;	/* see above */
+	BlockNumber hasho_nextblkno;	/* see above */
+	Bucket		hasho_bucket;	/* bucket number this pg belongs to */
+	uint16		hasho_flag;		/* page type code + flag bits, see above */
+	uint16		hasho_page_id;	/* for identification of hash indexes */
+} HashPageOpaqueData;
+
+typedef HashPageOpaqueData *HashPageOpaque;
+
+#define H_NEEDS_SPLIT_CLEANUP(opaque)	(((opaque)->hasho_flag & LH_BUCKET_NEEDS_SPLIT_CLEANUP) != 0)
+#define H_BUCKET_BEING_SPLIT(opaque)	(((opaque)->hasho_flag & LH_BUCKET_BEING_SPLIT) != 0)
+#define H_BUCKET_BEING_POPULATED(opaque)	(((opaque)->hasho_flag & LH_BUCKET_BEING_POPULATED) != 0)
+#define H_HAS_DEAD_TUPLES(opaque)		(((opaque)->hasho_flag & LH_PAGE_HAS_DEAD_TUPLES) != 0)
+
+/*
+ * The page ID is for the convenience of pg_filedump and similar utilities,
+ * which otherwise would have a hard time telling pages of different index
+ * types apart.  It should be the last 2 bytes on the page.  This is more or
+ * less "free" due to alignment considerations.
+ */
+#define HASHO_PAGE_ID		0xFF80
+
+typedef struct HashScanPosItem	/* what we remember about each match */
+{
+	ItemPointerData heapTid;	/* TID of referenced heap item */
+	OffsetNumber indexOffset;	/* index item's location within page */
+} HashScanPosItem;
+
+typedef struct HashScanPosData
+{
+	Buffer		buf;			/* if valid, the buffer is pinned */
+	BlockNumber currPage;		/* current hash index page */
+	BlockNumber nextPage;		/* next overflow page */
+	BlockNumber prevPage;		/* prev overflow or bucket page */
+
+	/*
+	 * The items array is always ordered in index order (ie, increasing
+	 * indexoffset).  When scanning backwards it is convenient to fill the
+	 * array back-to-front, so we start at the last slot and fill downwards.
+	 * Hence we need both a first-valid-entry and a last-valid-entry counter.
+	 * itemIndex is a cursor showing which entry was last returned to caller.
+	 */
+	int			firstItem;		/* first valid index in items[] */
+	int			lastItem;		/* last valid index in items[] */
+	int			itemIndex;		/* current index in items[] */
+
+	HashScanPosItem items[MaxIndexTuplesPerPage];	/* MUST BE LAST */
+} HashScanPosData;
+
+#define HashScanPosIsPinned(scanpos) \
+( \
+	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
+				!BufferIsValid((scanpos).buf)), \
+	BufferIsValid((scanpos).buf) \
+)
+
+#define HashScanPosIsValid(scanpos) \
+( \
+	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
+				!BufferIsValid((scanpos).buf)), \
+	BlockNumberIsValid((scanpos).currPage) \
+)
+
+#define HashScanPosInvalidate(scanpos) \
+	do { \
+		(scanpos).buf = InvalidBuffer; \
+		(scanpos).currPage = InvalidBlockNumber; \
+		(scanpos).nextPage = InvalidBlockNumber; \
+		(scanpos).prevPage = InvalidBlockNumber; \
+		(scanpos).firstItem = 0; \
+		(scanpos).lastItem = 0; \
+		(scanpos).itemIndex = 0; \
+	} while (0)
+
+/*
+ *	HashScanOpaqueData is private state for a hash index scan.
+ */
+typedef struct HashScanOpaqueData
+{
+	/* Hash value of the scan key, ie, the hash key we seek */
+	uint32		hashso_sk_hash;
+
+	/* remember the buffer associated with primary bucket */
+	Buffer		hashso_bucket_buf;
+
+	/*
+	 * remember the buffer associated with primary bucket page of bucket being
+	 * split.  it is required during the scan of the bucket which is being
+	 * populated during split operation.
+	 */
+	Buffer		hashso_split_bucket_buf;
+
+	/* Whether scan starts on bucket being populated due to split */
+	bool		hashso_buc_populated;
+
+	/*
+	 * Whether scanning bucket being split?  The value of this parameter is
+	 * referred only when hashso_buc_populated is true.
+	 */
+	bool		hashso_buc_split;
+	/* info about killed items if any (killedItems is NULL if never used) */
+	int		   *killedItems;	/* currPos.items indexes of killed items */
+	int			numKilled;		/* number of currently stored items */
+
+	/*
+	 * Identify all the matching items on a page and save them in
+	 * HashScanPosData
+	 */
+	HashScanPosData currPos;	/* current position data */
+} HashScanOpaqueData;
+
+typedef HashScanOpaqueData *HashScanOpaque;
+
+/*
+ * Definitions for metapage.
+ */
+
+#define HASH_METAPAGE	0		/* metapage is always block 0 */
+
+#define HASH_MAGIC		0x6440640
+#define HASH_VERSION	4
+
+/*
+ * spares[] holds the number of overflow pages currently allocated at or
+ * before a certain splitpoint. For example, if spares[3] = 7 then there are
+ * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro).  The
+ * value in spares[ovflpoint] increases as overflow pages are added at the
+ * end of the index.  Once ovflpoint increases (ie, we have actually allocated
+ * the bucket pages belonging to that splitpoint) the number of spares at the
+ * prior splitpoint cannot change anymore.
+ *
+ * ovflpages that have been recycled for reuse can be found by looking at
+ * bitmaps that are stored within ovflpages dedicated for the purpose.
+ * The blknos of these bitmap pages are kept in mapp[]; nmaps is the
+ * number of currently existing bitmaps.
+ *
+ * The limitation on the size of spares[] comes from the fact that there's
+ * no point in having more than 2^32 buckets with only uint32 hashcodes.
+ * (Note: The value of HASH_MAX_SPLITPOINTS which is the size of spares[] is
+ * adjusted in such a way to accommodate multi phased allocation of buckets
+ * after HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE).
+ *
+ * There is no particular upper limit on the size of mapp[], other than
+ * needing to fit into the metapage.  (With 8K block size, 1024 bitmaps
+ * limit us to 256 GB of overflow space...).  For smaller block size we
+ * can not use 1024 bitmaps as it will lead to the meta page data crossing
+ * the block size boundary.  So we use BLCKSZ to determine the maximum number
+ * of bitmaps.
+ */
+#define HASH_MAX_BITMAPS			Min(BLCKSZ / 8, 1024)
+
+#define HASH_SPLITPOINT_PHASE_BITS	2
+#define HASH_SPLITPOINT_PHASES_PER_GRP	(1 << HASH_SPLITPOINT_PHASE_BITS)
+#define HASH_SPLITPOINT_PHASE_MASK		(HASH_SPLITPOINT_PHASES_PER_GRP - 1)
+#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE	10
+
+/* defines max number of splitpoint phases a hash index can have */
+#define HASH_MAX_SPLITPOINT_GROUP	32
+#define HASH_MAX_SPLITPOINTS \
+	(((HASH_MAX_SPLITPOINT_GROUP - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) * \
+	  HASH_SPLITPOINT_PHASES_PER_GRP) + \
+	 HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
+
+typedef struct HashMetaPageData
+{
+	uint32		hashm_magic;	/* magic no. for hash tables */
+	uint32		hashm_version;	/* version ID */
+	double		hashm_ntuples;	/* number of tuples stored in the table */
+	uint16		hashm_ffactor;	/* target fill factor (tuples/bucket) */
+	uint16		hashm_bsize;	/* index page size (bytes) */
+	uint16		hashm_bmsize;	/* bitmap array size (bytes) - must be a power
+								 * of 2 */
+	uint16		hashm_bmshift;	/* log2(bitmap array size in BITS) */
+	uint32		hashm_maxbucket;	/* ID of maximum bucket in use */
+	uint32		hashm_highmask; /* mask to modulo into entire table */
+	uint32		hashm_lowmask;	/* mask to modulo into lower half of table */
+	uint32		hashm_ovflpoint;	/* splitpoint from which ovflpage being
+									 * allocated */
+	uint32		hashm_firstfree;	/* lowest-number free ovflpage (bit#) */
+	uint32		hashm_nmaps;	/* number of bitmap pages */
+	RegProcedure hashm_procid;	/* hash function id from pg_proc */
+	uint32		hashm_spares[HASH_MAX_SPLITPOINTS]; /* spare pages before each
+													 * splitpoint */
+	BlockNumber hashm_mapp[HASH_MAX_BITMAPS];	/* blknos of ovfl bitmaps */
+} HashMetaPageData;
+
+typedef HashMetaPageData *HashMetaPage;
+
+typedef struct HashOptions
+{
+	int32		varlena_header_;	/* varlena header (do not touch directly!) */
+	int			fillfactor;		/* page fill factor in percent (0..100) */
+} HashOptions;
+
+#define HashGetFillFactor(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == HASH_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((HashOptions *) (relation)->rd_options)->fillfactor :	\
+	 HASH_DEFAULT_FILLFACTOR)
+#define HashGetTargetPageUsage(relation) \
+	(BLCKSZ * HashGetFillFactor(relation) / 100)
+
+/*
+ * Maximum size of a hash index item (it's okay to have only one per page)
+ */
+#define HashMaxItemSize(page) \
+	MAXALIGN_DOWN(PageGetPageSize(page) - \
+				  SizeOfPageHeaderData - \
+				  sizeof(ItemIdData) - \
+				  MAXALIGN(sizeof(HashPageOpaqueData)))
+
+#define INDEX_MOVED_BY_SPLIT_MASK	INDEX_AM_RESERVED_BIT
+
+#define HASH_MIN_FILLFACTOR			10
+#define HASH_DEFAULT_FILLFACTOR		75
+
+/*
+ * Constants
+ */
+#define BYTE_TO_BIT				3	/* 2^3 bits/byte */
+#define ALL_SET					((uint32) ~0)
+
+/*
+ * Bitmap pages do not contain tuples.  They do contain the standard
+ * page headers and trailers; however, everything in between is a
+ * giant bit array.  The number of bits that fit on a page obviously
+ * depends on the page size and the header/trailer overhead.  We require
+ * the number of bits per page to be a power of 2.
+ */
+#define BMPGSZ_BYTE(metap)		((metap)->hashm_bmsize)
+#define BMPGSZ_BIT(metap)		((metap)->hashm_bmsize << BYTE_TO_BIT)
+#define BMPG_SHIFT(metap)		((metap)->hashm_bmshift)
+#define BMPG_MASK(metap)		(BMPGSZ_BIT(metap) - 1)
+
+#define HashPageGetBitmap(page) \
+	((uint32 *) PageGetContents(page))
+
+#define HashGetMaxBitmapSize(page) \
+	(PageGetPageSize((Page) page) - \
+	 (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))
+
+#define HashPageGetMeta(page) \
+	((HashMetaPage) PageGetContents(page))
+
+/*
+ * The number of bits in an ovflpage bitmap word.
+ */
+#define BITS_PER_MAP	32		/* Number of bits in uint32 */
+
+/* Given the address of the beginning of a bit map, clear/set the nth bit */
+#define CLRBIT(A, N)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
+#define SETBIT(A, N)	((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
+#define ISSET(A, N)		((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
+
+/*
+ * page-level and high-level locking modes (see README)
+ */
+#define HASH_READ		BUFFER_LOCK_SHARE
+#define HASH_WRITE		BUFFER_LOCK_EXCLUSIVE
+#define HASH_NOLOCK		(-1)
+
+/*
+ * When a new operator class is declared, we require that the user supply
+ * us with an amproc function for hashing a key of the new type, returning
+ * a 32-bit hash value.  We call this the "standard" hash function.  We
+ * also allow an optional "extended" hash function which accepts a salt and
+ * returns a 64-bit hash value.  This is highly recommended but, for reasons
+ * of backward compatibility, optional.
+ *
+ * When the salt is 0, the low 32 bits of the value returned by the extended
+ * hash function should match the value that would have been returned by the
+ * standard hash function.
+ */
+#define HASHSTANDARD_PROC		1
+#define HASHEXTENDED_PROC		2
+#define HASHOPTIONS_PROC		3
+#define HASHNProcs				3
+
+
+/* public routines */
+
+extern IndexBuildResult *hashbuild(Relation heap, Relation index,
+								   struct IndexInfo *indexInfo);
+extern void hashbuildempty(Relation index);
+extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
+					   ItemPointer ht_ctid, Relation heapRel,
+					   IndexUniqueCheck checkUnique,
+					   bool indexUnchanged,
+					   struct IndexInfo *indexInfo);
+extern bool hashgettuple(IndexScanDesc scan, ScanDirection dir);
+extern int64 hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern IndexScanDesc hashbeginscan(Relation rel, int nkeys, int norderbys);
+extern void hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+					   ScanKey orderbys, int norderbys);
+extern void hashendscan(IndexScanDesc scan);
+extern IndexBulkDeleteResult *hashbulkdelete(IndexVacuumInfo *info,
+											 IndexBulkDeleteResult *stats,
+											 IndexBulkDeleteCallback callback,
+											 void *callback_state);
+extern IndexBulkDeleteResult *hashvacuumcleanup(IndexVacuumInfo *info,
+												IndexBulkDeleteResult *stats);
+extern bytea *hashoptions(Datum reloptions, bool validate);
+extern bool hashvalidate(Oid opclassoid);
+extern void hashadjustmembers(Oid opfamilyoid,
+							  Oid opclassoid,
+							  List *operators,
+							  List *functions);
+
+/* private routines */
+
+/* hashinsert.c */
+extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel);
+extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
+								   Size itemsize, IndexTuple itup);
+extern void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
+								OffsetNumber *itup_offsets, uint16 nitups);
+
+/* hashovfl.c */
+extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin);
+extern BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
+									  Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
+									  Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy);
+extern void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage);
+extern void _hash_squeezebucket(Relation rel,
+								Bucket bucket, BlockNumber bucket_blkno,
+								Buffer bucket_buf,
+								BufferAccessStrategy bstrategy);
+extern uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno);
+
+/* hashpage.c */
+extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
+						   int access, int flags);
+extern Buffer _hash_getbuf_with_condlock_cleanup(Relation rel,
+												 BlockNumber blkno, int flags);
+extern HashMetaPage _hash_getcachedmetap(Relation rel, Buffer *metabuf,
+										 bool force_refresh);
+extern Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey,
+											  int access,
+											  HashMetaPage *cachedmetap);
+extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
+extern void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket,
+						  uint32 flag, bool initpage);
+extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
+							  ForkNumber forkNum);
+extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
+										 int access, int flags,
+										 BufferAccessStrategy bstrategy);
+extern void _hash_relbuf(Relation rel, Buffer buf);
+extern void _hash_dropbuf(Relation rel, Buffer buf);
+extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
+extern uint32 _hash_init(Relation rel, double num_tuples,
+						 ForkNumber forkNum);
+extern void _hash_init_metabuffer(Buffer buf, double num_tuples,
+								  RegProcedure procid, uint16 ffactor, bool initpage);
+extern void _hash_pageinit(Page page, Size size);
+extern void _hash_expandtable(Relation rel, Buffer metabuf);
+extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf,
+							   Bucket obucket, uint32 maxbucket, uint32 highmask,
+							   uint32 lowmask);
+
+/* hashsearch.c */
+extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
+extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
+
+/* hashsort.c */
+typedef struct HSpool HSpool;	/* opaque struct in hashsort.c */
+
+extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
+extern void _h_spooldestroy(HSpool *hspool);
+extern void _h_spool(HSpool *hspool, ItemPointer self,
+					 Datum *values, bool *isnull);
+extern void _h_indexbuild(HSpool *hspool, Relation heapRel);
+
+/* hashutil.c */
+extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
+extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
+extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
+extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
+								   uint32 highmask, uint32 lowmask);
+extern uint32 _hash_spareindex(uint32 num_bucket);
+extern uint32 _hash_get_totalbuckets(uint32 splitpoint_phase);
+extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
+extern bool _hash_convert_tuple(Relation index,
+								Datum *user_values, bool *user_isnull,
+								Datum *index_values, bool *index_isnull);
+extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
+extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
+extern BlockNumber _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket);
+extern BlockNumber _hash_get_newblock_from_oldbucket(Relation rel, Bucket old_bucket);
+extern Bucket _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket,
+												 uint32 lowmask, uint32 maxbucket);
+extern void _hash_kill_items(IndexScanDesc scan);
+
+/* hash.c */
+extern void hashbucketcleanup(Relation rel, Bucket cur_bucket,
+							  Buffer bucket_buf, BlockNumber bucket_blkno,
+							  BufferAccessStrategy bstrategy,
+							  uint32 maxbucket, uint32 highmask, uint32 lowmask,
+							  double *tuples_removed, double *num_index_tuples,
+							  bool split_cleanup,
+							  IndexBulkDeleteCallback callback, void *callback_state);
+
+#endif							/* HASH_H */
diff --git a/src/include/access/hash_xlog.h b/src/include/access/hash_xlog.h
new file mode 100644
index 0000000..4353a32
--- /dev/null
+++ b/src/include/access/hash_xlog.h
@@ -0,0 +1,267 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash_xlog.h
+ *	  header file for Postgres hash AM implementation
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/hash_xlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HASH_XLOG_H
+#define HASH_XLOG_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/off.h"
+
+/* Number of buffers required for XLOG_HASH_SQUEEZE_PAGE operation */
+#define HASH_XLOG_FREE_OVFL_BUFS	6
+
+/*
+ * XLOG records for hash operations
+ */
+#define XLOG_HASH_INIT_META_PAGE	0x00	/* initialize the meta page */
+#define XLOG_HASH_INIT_BITMAP_PAGE	0x10	/* initialize the bitmap page */
+#define XLOG_HASH_INSERT		0x20	/* add index tuple without split */
+#define XLOG_HASH_ADD_OVFL_PAGE 0x30	/* add overflow page */
+#define XLOG_HASH_SPLIT_ALLOCATE_PAGE	0x40	/* allocate new page for split */
+#define XLOG_HASH_SPLIT_PAGE	0x50	/* split page */
+#define XLOG_HASH_SPLIT_COMPLETE	0x60	/* completion of split operation */
+#define XLOG_HASH_MOVE_PAGE_CONTENTS	0x70	/* remove tuples from one page
+												 * and add to another page */
+#define XLOG_HASH_SQUEEZE_PAGE	0x80	/* add tuples to one of the previous
+										 * pages in chain and free the ovfl
+										 * page */
+#define XLOG_HASH_DELETE		0x90	/* delete index tuples from a page */
+#define XLOG_HASH_SPLIT_CLEANUP 0xA0	/* clear split-cleanup flag in primary
+										 * bucket page after deleting tuples
+										 * that are moved due to split	*/
+#define XLOG_HASH_UPDATE_META_PAGE	0xB0	/* update meta page after vacuum */
+
+#define XLOG_HASH_VACUUM_ONE_PAGE	0xC0	/* remove dead tuples from index
+											 * page */
+
+/*
+ * xl_hash_split_allocate_page flag values, 8 bits are available.
+ */
+#define XLH_SPLIT_META_UPDATE_MASKS		(1<<0)
+#define XLH_SPLIT_META_UPDATE_SPLITPOINT		(1<<1)
+
+/*
+ * This is what we need to know about simple (without split) insert.
+ *
+ * This data record is used for XLOG_HASH_INSERT
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: metapage (HashMetaPageData)
+ */
+typedef struct xl_hash_insert
+{
+	OffsetNumber offnum;
+} xl_hash_insert;
+
+#define SizeOfHashInsert	(offsetof(xl_hash_insert, offnum) + sizeof(OffsetNumber))
+
+/*
+ * This is what we need to know about addition of overflow page.
+ *
+ * This data record is used for XLOG_HASH_ADD_OVFL_PAGE
+ *
+ * Backup Blk 0: newly allocated overflow page
+ * Backup Blk 1: page before new overflow page in the bucket chain
+ * Backup Blk 2: bitmap page
+ * Backup Blk 3: new bitmap page
+ * Backup Blk 4: metapage
+ */
+typedef struct xl_hash_add_ovfl_page
+{
+	uint16		bmsize;
+	bool		bmpage_found;
+} xl_hash_add_ovfl_page;
+
+#define SizeOfHashAddOvflPage	\
+	(offsetof(xl_hash_add_ovfl_page, bmpage_found) + sizeof(bool))
+
+/*
+ * This is what we need to know about allocating a page for split.
+ *
+ * This data record is used for XLOG_HASH_SPLIT_ALLOCATE_PAGE
+ *
+ * Backup Blk 0: page for old bucket
+ * Backup Blk 1: page for new bucket
+ * Backup Blk 2: metapage
+ */
+typedef struct xl_hash_split_allocate_page
+{
+	uint32		new_bucket;
+	uint16		old_bucket_flag;
+	uint16		new_bucket_flag;
+	uint8		flags;
+} xl_hash_split_allocate_page;
+
+#define SizeOfHashSplitAllocPage	\
+	(offsetof(xl_hash_split_allocate_page, flags) + sizeof(uint8))
+
+/*
+ * This is what we need to know about completing the split operation.
+ *
+ * This data record is used for XLOG_HASH_SPLIT_COMPLETE
+ *
+ * Backup Blk 0: page for old bucket
+ * Backup Blk 1: page for new bucket
+ */
+typedef struct xl_hash_split_complete
+{
+	uint16		old_bucket_flag;
+	uint16		new_bucket_flag;
+} xl_hash_split_complete;
+
+#define SizeOfHashSplitComplete \
+	(offsetof(xl_hash_split_complete, new_bucket_flag) + sizeof(uint16))
+
+/*
+ * This is what we need to know about move page contents required during
+ * squeeze operation.
+ *
+ * This data record is used for XLOG_HASH_MOVE_PAGE_CONTENTS
+ *
+ * Backup Blk 0: bucket page
+ * Backup Blk 1: page containing moved tuples
+ * Backup Blk 2: page from which tuples will be removed
+ */
+typedef struct xl_hash_move_page_contents
+{
+	uint16		ntups;
+	bool		is_prim_bucket_same_wrt;	/* true if the page to which
+											 * tuples are moved is same as
+											 * primary bucket page */
+} xl_hash_move_page_contents;
+
+#define SizeOfHashMovePageContents	\
+	(offsetof(xl_hash_move_page_contents, is_prim_bucket_same_wrt) + sizeof(bool))
+
+/*
+ * This is what we need to know about the squeeze page operation.
+ *
+ * This data record is used for XLOG_HASH_SQUEEZE_PAGE
+ *
+ * Backup Blk 0: page containing tuples moved from freed overflow page
+ * Backup Blk 1: freed overflow page
+ * Backup Blk 2: page previous to the freed overflow page
+ * Backup Blk 3: page next to the freed overflow page
+ * Backup Blk 4: bitmap page containing info of freed overflow page
+ * Backup Blk 5: meta page
+ */
+typedef struct xl_hash_squeeze_page
+{
+	BlockNumber prevblkno;
+	BlockNumber nextblkno;
+	uint16		ntups;
+	bool		is_prim_bucket_same_wrt;	/* true if the page to which
+											 * tuples are moved is same as
+											 * primary bucket page */
+	bool		is_prev_bucket_same_wrt;	/* true if the page to which
+											 * tuples are moved is the page
+											 * previous to the freed overflow
+											 * page */
+} xl_hash_squeeze_page;
+
+#define SizeOfHashSqueezePage	\
+	(offsetof(xl_hash_squeeze_page, is_prev_bucket_same_wrt) + sizeof(bool))
+
+/*
+ * This is what we need to know about the deletion of index tuples from a page.
+ *
+ * This data record is used for XLOG_HASH_DELETE
+ *
+ * Backup Blk 0: primary bucket page
+ * Backup Blk 1: page from which tuples are deleted
+ */
+typedef struct xl_hash_delete
+{
+	bool		clear_dead_marking; /* true if this operation clears
+									 * LH_PAGE_HAS_DEAD_TUPLES flag */
+	bool		is_primary_bucket_page; /* true if the operation is for
+										 * primary bucket page */
+} xl_hash_delete;
+
+#define SizeOfHashDelete	(offsetof(xl_hash_delete, is_primary_bucket_page) + sizeof(bool))
+
+/*
+ * This is what we need for metapage update operation.
+ *
+ * This data record is used for XLOG_HASH_UPDATE_META_PAGE
+ *
+ * Backup Blk 0: meta page
+ */
+typedef struct xl_hash_update_meta_page
+{
+	double		ntuples;
+} xl_hash_update_meta_page;
+
+#define SizeOfHashUpdateMetaPage	\
+	(offsetof(xl_hash_update_meta_page, ntuples) + sizeof(double))
+
+/*
+ * This is what we need to initialize metapage.
+ *
+ * This data record is used for XLOG_HASH_INIT_META_PAGE
+ *
+ * Backup Blk 0: meta page
+ */
+typedef struct xl_hash_init_meta_page
+{
+	double		num_tuples;
+	RegProcedure procid;
+	uint16		ffactor;
+} xl_hash_init_meta_page;
+
+#define SizeOfHashInitMetaPage		\
+	(offsetof(xl_hash_init_meta_page, ffactor) + sizeof(uint16))
+
+/*
+ * This is what we need to initialize bitmap page.
+ *
+ * This data record is used for XLOG_HASH_INIT_BITMAP_PAGE
+ *
+ * Backup Blk 0: bitmap page
+ * Backup Blk 1: meta page
+ */
+typedef struct xl_hash_init_bitmap_page
+{
+	uint16		bmsize;
+} xl_hash_init_bitmap_page;
+
+#define SizeOfHashInitBitmapPage	\
+	(offsetof(xl_hash_init_bitmap_page, bmsize) + sizeof(uint16))
+
+/*
+ * This is what we need for index tuple deletion and to
+ * update the meta page.
+ *
+ * This data record is used for XLOG_HASH_VACUUM_ONE_PAGE
+ *
+ * Backup Blk 0: bucket page
+ * Backup Blk 1: meta page
+ */
+typedef struct xl_hash_vacuum_one_page
+{
+	TransactionId latestRemovedXid;
+	int			ntuples;
+
+	/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
+} xl_hash_vacuum_one_page;
+
+#define SizeOfHashVacuumOnePage \
+	(offsetof(xl_hash_vacuum_one_page, ntuples) + sizeof(int))
+
+extern void hash_redo(XLogReaderState *record);
+extern void hash_desc(StringInfo buf, XLogReaderState *record);
+extern const char *hash_identify(uint8 info);
+extern void hash_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* HASH_XLOG_H */
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
new file mode 100644
index 0000000..4f1dff9
--- /dev/null
+++ b/src/include/access/heapam.h
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam.h
+ *	  POSTGRES heap access method definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/heapam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HEAPAM_H
+#define HEAPAM_H
+
+#include "access/relation.h"	/* for backward compatibility */
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/skey.h"
+#include "access/table.h"		/* for backward compatibility */
+#include "access/tableam.h"
+#include "nodes/lockoptions.h"
+#include "nodes/primnodes.h"
+#include "storage/bufpage.h"
+#include "storage/dsm.h"
+#include "storage/lockdefs.h"
+#include "storage/shm_toc.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+
+/* "options" flag bits for heap_insert */
+#define HEAP_INSERT_SKIP_FSM	TABLE_INSERT_SKIP_FSM
+#define HEAP_INSERT_FROZEN		TABLE_INSERT_FROZEN
+#define HEAP_INSERT_NO_LOGICAL	TABLE_INSERT_NO_LOGICAL
+#define HEAP_INSERT_SPECULATIVE 0x0010
+
+typedef struct BulkInsertStateData *BulkInsertState;
+struct TupleTableSlot;
+
+#define MaxLockTupleMode	LockTupleExclusive
+
+/*
+ * Descriptor for heap table scans.
+ */
+typedef struct HeapScanDescData
+{
+	TableScanDescData rs_base;	/* AM independent part of the descriptor */
+
+	/* state set up at initscan time */
+	BlockNumber rs_nblocks;		/* total number of blocks in rel */
+	BlockNumber rs_startblock;	/* block # to start at */
+	BlockNumber rs_numblocks;	/* max number of blocks to scan */
+	/* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
+
+	/* scan current state */
+	bool		rs_inited;		/* false = scan not init'd yet */
+	BlockNumber rs_cblock;		/* current block # in scan, if any */
+	Buffer		rs_cbuf;		/* current buffer in scan, if any */
+	/* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+
+	/* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
+	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
+
+	HeapTupleData rs_ctup;		/* current tuple in scan, if any */
+
+	/*
+	 * For parallel scans to store page allocation data.  NULL when not
+	 * performing a parallel scan.
+	 */
+	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
+
+	/* these fields only used in page-at-a-time mode and for bitmap scans */
+	int			rs_cindex;		/* current tuple's index in vistuples */
+	int			rs_ntuples;		/* number of visible tuples on page */
+	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
+}			HeapScanDescData;
+typedef struct HeapScanDescData *HeapScanDesc;
+
+/*
+ * Descriptor for fetches from heap via an index.
+ */
+typedef struct IndexFetchHeapData
+{
+	IndexFetchTableData xs_base;	/* AM independent part of the descriptor */
+
+	Buffer		xs_cbuf;		/* current heap buffer in scan, if any */
+	/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+} IndexFetchHeapData;
+
+/* Result codes for HeapTupleSatisfiesVacuum */
+typedef enum
+{
+	HEAPTUPLE_DEAD,				/* tuple is dead and deletable */
+	HEAPTUPLE_LIVE,				/* tuple is live (committed, no deleter) */
+	HEAPTUPLE_RECENTLY_DEAD,	/* tuple is dead, but not deletable yet */
+	HEAPTUPLE_INSERT_IN_PROGRESS,	/* inserting xact is still in progress */
+	HEAPTUPLE_DELETE_IN_PROGRESS	/* deleting xact is still in progress */
+} HTSV_Result;
+
+/* ----------------
+ *		function prototypes for heap access method
+ *
+ * heap_create, heap_create_with_catalog, and heap_drop_with_catalog
+ * are declared in catalog/heap.h
+ * ----------------
+ */
+
+
+/*
+ * HeapScanIsValid
+ *		True iff the heap scan is valid.
+ */
+#define HeapScanIsValid(scan) PointerIsValid(scan)
+
+extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
+									int nkeys, ScanKey key,
+									ParallelTableScanDesc parallel_scan,
+									uint32 flags);
+extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk,
+							   BlockNumber numBlks);
+extern void heapgetpage(TableScanDesc scan, BlockNumber page);
+extern void heap_rescan(TableScanDesc scan, ScanKey key, bool set_params,
+						bool allow_strat, bool allow_sync, bool allow_pagemode);
+extern void heap_endscan(TableScanDesc scan);
+extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
+extern bool heap_getnextslot(TableScanDesc sscan,
+							 ScanDirection direction, struct TupleTableSlot *slot);
+extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+							  ItemPointer maxtid);
+extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
+									  ScanDirection direction,
+									  TupleTableSlot *slot);
+extern bool heap_fetch(Relation relation, Snapshot snapshot,
+					   HeapTuple tuple, Buffer *userbuf);
+extern bool heap_fetch_extended(Relation relation, Snapshot snapshot,
+								HeapTuple tuple, Buffer *userbuf,
+								bool keep_buf);
+extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
+								   Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
+								   bool *all_dead, bool first_call);
+
+extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
+
+extern BulkInsertState GetBulkInsertState(void);
+extern void FreeBulkInsertState(BulkInsertState);
+extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
+
+extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
+						int options, BulkInsertState bistate);
+extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
+							  int ntuples, CommandId cid, int options,
+							  BulkInsertState bistate);
+extern TM_Result heap_delete(Relation relation, ItemPointer tid,
+							 CommandId cid, Snapshot crosscheck, bool wait,
+							 struct TM_FailureData *tmfd, bool changingPart);
+extern void heap_finish_speculative(Relation relation, ItemPointer tid);
+extern void heap_abort_speculative(Relation relation, ItemPointer tid);
+extern TM_Result heap_update(Relation relation, ItemPointer otid,
+							 HeapTuple newtup,
+							 CommandId cid, Snapshot crosscheck, bool wait,
+							 struct TM_FailureData *tmfd, LockTupleMode *lockmode);
+extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
+								 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
+								 bool follow_update,
+								 Buffer *buffer, struct TM_FailureData *tmfd);
+
+extern void heap_inplace_update(Relation relation, HeapTuple tuple);
+extern bool heap_freeze_tuple(HeapTupleHeader tuple,
+							  TransactionId relfrozenxid, TransactionId relminmxid,
+							  TransactionId cutoff_xid, TransactionId cutoff_multi);
+extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
+									MultiXactId cutoff_multi, Buffer buf);
+extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
+
+extern void simple_heap_insert(Relation relation, HeapTuple tup);
+extern void simple_heap_delete(Relation relation, ItemPointer tid);
+extern void simple_heap_update(Relation relation, ItemPointer otid,
+							   HeapTuple tup);
+
+extern TransactionId heap_index_delete_tuples(Relation rel,
+											  TM_IndexDeleteOp *delstate);
+
+/* in heap/pruneheap.c */
+struct GlobalVisState;
+extern void heap_page_prune_opt(Relation relation, Buffer buffer);
+extern int	heap_page_prune(Relation relation, Buffer buffer,
+							struct GlobalVisState *vistest,
+							TransactionId old_snap_xmin,
+							TimestampTz old_snap_ts_ts,
+							bool report_stats,
+							OffsetNumber *off_loc);
+extern void heap_page_prune_execute(Buffer buffer,
+									OffsetNumber *redirected, int nredirected,
+									OffsetNumber *nowdead, int ndead,
+									OffsetNumber *nowunused, int nunused);
+extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+
+/* in heap/vacuumlazy.c */
+struct VacuumParams;
+extern void heap_vacuum_rel(Relation rel,
+							struct VacuumParams *params, BufferAccessStrategy bstrategy);
+extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
+
+/* in heap/heapam_visibility.c */
+extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,
+										 Buffer buffer);
+extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple stup, CommandId curcid,
+										  Buffer buffer);
+extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
+											Buffer buffer);
+extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple stup, Buffer buffer,
+												   TransactionId *dead_after);
+extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
+								 uint16 infomask, TransactionId xid);
+extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
+extern bool HeapTupleIsSurelyDead(HeapTuple htup,
+								  struct GlobalVisState *vistest);
+
+/*
+ * To avoid leaking too much knowledge about reorderbuffer implementation
+ * details this is implemented in reorderbuffer.c not heapam_visibility.c
+ */
+struct HTAB;
+extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
+										  Snapshot snapshot,
+										  HeapTuple htup,
+										  Buffer buffer,
+										  CommandId *cmin, CommandId *cmax);
+extern void HeapCheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,
+												Buffer buffer, Snapshot snapshot);
+
+#endif							/* HEAPAM_H */
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
new file mode 100644
index 0000000..27db481
--- /dev/null
+++ b/src/include/access/heapam_xlog.h
@@ -0,0 +1,419 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam_xlog.h
+ *	  POSTGRES heap access XLOG definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/heapam_xlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HEAPAM_XLOG_H
+#define HEAPAM_XLOG_H
+
+#include "access/htup.h"
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+
+/*
+ * WAL record definitions for heapam.c's WAL operations
+ *
+ * XLOG allows to store some information in high 4 bits of log
+ * record xl_info field.  We use 3 for opcode and one for init bit.
+ */
+#define XLOG_HEAP_INSERT		0x00
+#define XLOG_HEAP_DELETE		0x10
+#define XLOG_HEAP_UPDATE		0x20
+#define XLOG_HEAP_TRUNCATE		0x30
+#define XLOG_HEAP_HOT_UPDATE	0x40
+#define XLOG_HEAP_CONFIRM		0x50
+#define XLOG_HEAP_LOCK			0x60
+#define XLOG_HEAP_INPLACE		0x70
+
+#define XLOG_HEAP_OPMASK		0x70
+/*
+ * When we insert 1st item on new page in INSERT, UPDATE, HOT_UPDATE,
+ * or MULTI_INSERT, we can (and we do) restore entire page in redo
+ */
+#define XLOG_HEAP_INIT_PAGE		0x80
+/*
+ * We ran out of opcodes, so heapam.c now has a second RmgrId.  These opcodes
+ * are associated with RM_HEAP2_ID, but are not logically different from
+ * the ones above associated with RM_HEAP_ID.  XLOG_HEAP_OPMASK applies to
+ * these, too.
+ */
+#define XLOG_HEAP2_REWRITE		0x00
+#define XLOG_HEAP2_PRUNE		0x10
+#define XLOG_HEAP2_VACUUM		0x20
+#define XLOG_HEAP2_FREEZE_PAGE	0x30
+#define XLOG_HEAP2_VISIBLE		0x40
+#define XLOG_HEAP2_MULTI_INSERT 0x50
+#define XLOG_HEAP2_LOCK_UPDATED 0x60
+#define XLOG_HEAP2_NEW_CID		0x70
+
+/*
+ * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
+ */
+/* PD_ALL_VISIBLE was cleared */
+#define XLH_INSERT_ALL_VISIBLE_CLEARED			(1<<0)
+#define XLH_INSERT_LAST_IN_MULTI				(1<<1)
+#define XLH_INSERT_IS_SPECULATIVE				(1<<2)
+#define XLH_INSERT_CONTAINS_NEW_TUPLE			(1<<3)
+#define XLH_INSERT_ON_TOAST_RELATION			(1<<4)
+
+/* all_frozen_set always implies all_visible_set */
+#define XLH_INSERT_ALL_FROZEN_SET				(1<<5)
+
+/*
+ * xl_heap_update flag values, 8 bits are available.
+ */
+/* PD_ALL_VISIBLE was cleared */
+#define XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED		(1<<0)
+/* PD_ALL_VISIBLE was cleared in the 2nd page */
+#define XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED		(1<<1)
+#define XLH_UPDATE_CONTAINS_OLD_TUPLE			(1<<2)
+#define XLH_UPDATE_CONTAINS_OLD_KEY				(1<<3)
+#define XLH_UPDATE_CONTAINS_NEW_TUPLE			(1<<4)
+#define XLH_UPDATE_PREFIX_FROM_OLD				(1<<5)
+#define XLH_UPDATE_SUFFIX_FROM_OLD				(1<<6)
+
+/* convenience macro for checking whether any form of old tuple was logged */
+#define XLH_UPDATE_CONTAINS_OLD						\
+	(XLH_UPDATE_CONTAINS_OLD_TUPLE | XLH_UPDATE_CONTAINS_OLD_KEY)
+
+/*
+ * xl_heap_delete flag values, 8 bits are available.
+ */
+/* PD_ALL_VISIBLE was cleared */
+#define XLH_DELETE_ALL_VISIBLE_CLEARED			(1<<0)
+#define XLH_DELETE_CONTAINS_OLD_TUPLE			(1<<1)
+#define XLH_DELETE_CONTAINS_OLD_KEY				(1<<2)
+#define XLH_DELETE_IS_SUPER						(1<<3)
+#define XLH_DELETE_IS_PARTITION_MOVE			(1<<4)
+
+/* convenience macro for checking whether any form of old tuple was logged */
+#define XLH_DELETE_CONTAINS_OLD						\
+	(XLH_DELETE_CONTAINS_OLD_TUPLE | XLH_DELETE_CONTAINS_OLD_KEY)
+
+/* This is what we need to know about delete */
+typedef struct xl_heap_delete
+{
+	TransactionId xmax;			/* xmax of the deleted tuple */
+	OffsetNumber offnum;		/* deleted tuple's offset */
+	uint8		infobits_set;	/* infomask bits */
+	uint8		flags;
+} xl_heap_delete;
+
+#define SizeOfHeapDelete	(offsetof(xl_heap_delete, flags) + sizeof(uint8))
+
+/*
+ * xl_heap_truncate flag values, 8 bits are available.
+ */
+#define XLH_TRUNCATE_CASCADE					(1<<0)
+#define XLH_TRUNCATE_RESTART_SEQS				(1<<1)
+
+/*
+ * For truncate we list all truncated relids in an array, followed by all
+ * sequence relids that need to be restarted, if any.
+ * All rels are always within the same database, so we just list dbid once.
+ */
+typedef struct xl_heap_truncate
+{
+	Oid			dbId;
+	uint32		nrelids;
+	uint8		flags;
+	Oid			relids[FLEXIBLE_ARRAY_MEMBER];
+} xl_heap_truncate;
+
+#define SizeOfHeapTruncate	(offsetof(xl_heap_truncate, relids))
+
+/*
+ * We don't store the whole fixed part (HeapTupleHeaderData) of an inserted
+ * or updated tuple in WAL; we can save a few bytes by reconstructing the
+ * fields that are available elsewhere in the WAL record, or perhaps just
+ * plain needn't be reconstructed.  These are the fields we must store.
+ */
+typedef struct xl_heap_header
+{
+	uint16		t_infomask2;
+	uint16		t_infomask;
+	uint8		t_hoff;
+} xl_heap_header;
+
+#define SizeOfHeapHeader	(offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
+
+/* This is what we need to know about insert */
+typedef struct xl_heap_insert
+{
+	OffsetNumber offnum;		/* inserted tuple's offset */
+	uint8		flags;
+
+	/* xl_heap_header & TUPLE DATA in backup block 0 */
+} xl_heap_insert;
+
+#define SizeOfHeapInsert	(offsetof(xl_heap_insert, flags) + sizeof(uint8))
+
+/*
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * In block 0's data portion, there is an xl_multi_insert_tuple struct,
+ * followed by the tuple data for each tuple. There is padding to align
+ * each xl_multi_insert_tuple struct.
+ */
+typedef struct xl_heap_multi_insert
+{
+	uint8		flags;
+	uint16		ntuples;
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} xl_heap_multi_insert;
+
+#define SizeOfHeapMultiInsert	offsetof(xl_heap_multi_insert, offsets)
+
+typedef struct xl_multi_insert_tuple
+{
+	uint16		datalen;		/* size of tuple data that follows */
+	uint16		t_infomask2;
+	uint16		t_infomask;
+	uint8		t_hoff;
+	/* TUPLE DATA FOLLOWS AT END OF STRUCT */
+} xl_multi_insert_tuple;
+
+#define SizeOfMultiInsertTuple	(offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
+
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ *
+ * If XLH_UPDATE_PREFIX_FROM_OLD or XLH_UPDATE_SUFFIX_FROM_OLD flags are set,
+ * the prefix and/or suffix come first, as one or two uint16s.
+ *
+ * After that, xl_heap_header and new tuple data follow.  The new tuple
+ * data doesn't include the prefix and suffix, which are copied from the
+ * old tuple on replay.
+ *
+ * If XLH_UPDATE_CONTAINS_NEW_TUPLE flag is given, the tuple data is
+ * included even if a full-page image was taken.
+ *
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
+typedef struct xl_heap_update
+{
+	TransactionId old_xmax;		/* xmax of the old tuple */
+	OffsetNumber old_offnum;	/* old tuple's offset */
+	uint8		old_infobits_set;	/* infomask bits to set on old tuple */
+	uint8		flags;
+	TransactionId new_xmax;		/* xmax of the new tuple */
+	OffsetNumber new_offnum;	/* new tuple's offset */
+
+	/*
+	 * If XLH_UPDATE_CONTAINS_OLD_TUPLE or XLH_UPDATE_CONTAINS_OLD_KEY flags
+	 * are set, xl_heap_header and tuple data for the old tuple follow.
+	 */
+} xl_heap_update;
+
+#define SizeOfHeapUpdate	(offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
+
+/*
+ * This is what we need to know about page pruning (both during VACUUM and
+ * during opportunistic pruning)
+ *
+ * The array of OffsetNumbers following the fixed part of the record contains:
+ *	* for each redirected item: the item offset, then the offset redirected to
+ *	* for each now-dead item: the item offset
+ *	* for each now-unused item: the item offset
+ * The total number of OffsetNumbers is therefore 2*nredirected+ndead+nunused.
+ * Note that nunused is not explicitly stored, but may be found by reference
+ * to the total record length.
+ *
+ * Requires a super-exclusive lock.
+ */
+typedef struct xl_heap_prune
+{
+	TransactionId latestRemovedXid;
+	uint16		nredirected;
+	uint16		ndead;
+	/* OFFSET NUMBERS are in the block reference 0 */
+} xl_heap_prune;
+
+#define SizeOfHeapPrune (offsetof(xl_heap_prune, ndead) + sizeof(uint16))
+
+/*
+ * The vacuum page record is similar to the prune record, but can only mark
+ * already dead items as unused
+ *
+ * Used by heap vacuuming only.  Does not require a super-exclusive lock.
+ */
+typedef struct xl_heap_vacuum
+{
+	uint16		nunused;
+	/* OFFSET NUMBERS are in the block reference 0 */
+} xl_heap_vacuum;
+
+#define SizeOfHeapVacuum (offsetof(xl_heap_vacuum, nunused) + sizeof(uint16))
+
+/* flags for infobits_set */
+#define XLHL_XMAX_IS_MULTI		0x01
+#define XLHL_XMAX_LOCK_ONLY		0x02
+#define XLHL_XMAX_EXCL_LOCK		0x04
+#define XLHL_XMAX_KEYSHR_LOCK	0x08
+#define XLHL_KEYS_UPDATED		0x10
+
+/* flag bits for xl_heap_lock / xl_heap_lock_updated's flag field */
+#define XLH_LOCK_ALL_FROZEN_CLEARED		0x01
+
+/* This is what we need to know about lock */
+typedef struct xl_heap_lock
+{
+	TransactionId locking_xid;	/* might be a MultiXactId not xid */
+	OffsetNumber offnum;		/* locked tuple's offset on page */
+	int8		infobits_set;	/* infomask and infomask2 bits to set */
+	uint8		flags;			/* XLH_LOCK_* flag bits */
+} xl_heap_lock;
+
+#define SizeOfHeapLock	(offsetof(xl_heap_lock, flags) + sizeof(int8))
+
+/* This is what we need to know about locking an updated version of a row */
+typedef struct xl_heap_lock_updated
+{
+	TransactionId xmax;
+	OffsetNumber offnum;
+	uint8		infobits_set;
+	uint8		flags;
+} xl_heap_lock_updated;
+
+#define SizeOfHeapLockUpdated	(offsetof(xl_heap_lock_updated, flags) + sizeof(uint8))
+
+/* This is what we need to know about confirmation of speculative insertion */
+typedef struct xl_heap_confirm
+{
+	OffsetNumber offnum;		/* confirmed tuple's offset on page */
+} xl_heap_confirm;
+
+#define SizeOfHeapConfirm	(offsetof(xl_heap_confirm, offnum) + sizeof(OffsetNumber))
+
+/* This is what we need to know about in-place update */
+typedef struct xl_heap_inplace
+{
+	OffsetNumber offnum;		/* updated tuple's offset on page */
+	/* TUPLE DATA FOLLOWS AT END OF STRUCT */
+} xl_heap_inplace;
+
+#define SizeOfHeapInplace	(offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
+
+/*
+ * This struct represents a 'freeze plan', which is what we need to know about
+ * a single tuple being frozen during vacuum.
+ */
+/* 0x01 was XLH_FREEZE_XMIN */
+#define		XLH_FREEZE_XVAC		0x02
+#define		XLH_INVALID_XVAC	0x04
+
+typedef struct xl_heap_freeze_tuple
+{
+	TransactionId xmax;
+	OffsetNumber offset;
+	uint16		t_infomask2;
+	uint16		t_infomask;
+	uint8		frzflags;
+} xl_heap_freeze_tuple;
+
+/*
+ * This is what we need to know about a block being frozen during vacuum
+ *
+ * Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
+ * one for each tuple.
+ */
+typedef struct xl_heap_freeze_page
+{
+	TransactionId cutoff_xid;
+	uint16		ntuples;
+} xl_heap_freeze_page;
+
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
+
+/*
+ * This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
+typedef struct xl_heap_visible
+{
+	TransactionId cutoff_xid;
+	uint8		flags;
+} xl_heap_visible;
+
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
+
+typedef struct xl_heap_new_cid
+{
+	/*
+	 * store toplevel xid so we don't have to merge cids from different
+	 * transactions
+	 */
+	TransactionId top_xid;
+	CommandId	cmin;
+	CommandId	cmax;
+	CommandId	combocid;		/* just for debugging */
+
+	/*
+	 * Store the relfilenode/ctid pair to facilitate lookups.
+	 */
+	RelFileNode target_node;
+	ItemPointerData target_tid;
+} xl_heap_new_cid;
+
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
+
+/* logical rewrite xlog record header */
+typedef struct xl_heap_rewrite_mapping
+{
+	TransactionId mapped_xid;	/* xid that might need to see the row */
+	Oid			mapped_db;		/* DbOid or InvalidOid for shared rels */
+	Oid			mapped_rel;		/* Oid of the mapped relation */
+	off_t		offset;			/* How far have we written so far */
+	uint32		num_mappings;	/* Number of in-memory mappings */
+	XLogRecPtr	start_lsn;		/* Insert LSN at begin of rewrite */
+} xl_heap_rewrite_mapping;
+
+extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
+												   TransactionId *latestRemovedXid);
+
+extern void heap_redo(XLogReaderState *record);
+extern void heap_desc(StringInfo buf, XLogReaderState *record);
+extern const char *heap_identify(uint8 info);
+extern void heap_mask(char *pagedata, BlockNumber blkno);
+extern void heap2_redo(XLogReaderState *record);
+extern void heap2_desc(StringInfo buf, XLogReaderState *record);
+extern const char *heap2_identify(uint8 info);
+extern void heap_xlog_logical_rewrite(XLogReaderState *r);
+
+extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
+								  TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples,
+								  int ntuples);
+extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+									  TransactionId relfrozenxid,
+									  TransactionId relminmxid,
+									  TransactionId cutoff_xid,
+									  TransactionId cutoff_multi,
+									  xl_heap_freeze_tuple *frz,
+									  bool *totally_frozen);
+extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
+									  xl_heap_freeze_tuple *xlrec_tp);
+extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
+								   Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
+
+#endif							/* HEAPAM_XLOG_H */
diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h
new file mode 100644
index 0000000..8b29f1a
--- /dev/null
+++ b/src/include/access/heaptoast.h
@@ -0,0 +1,149 @@
+/*-------------------------------------------------------------------------
+ *
+ * heaptoast.h
+ *	  Heap-specific definitions for external and compressed storage
+ *	  of variable size attributes.
+ *
+ * Copyright (c) 2000-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/heaptoast.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HEAPTOAST_H
+#define HEAPTOAST_H
+
+#include "access/htup_details.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+
+/*
+ * Find the maximum size of a tuple if there are to be N tuples per page.
+ */
+#define MaximumBytesPerTuple(tuplesPerPage) \
+	MAXALIGN_DOWN((BLCKSZ - \
+				   MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \
+				  / (tuplesPerPage))
+
+/*
+ * These symbols control toaster activation.  If a tuple is larger than
+ * TOAST_TUPLE_THRESHOLD, we will try to toast it down to no more than
+ * TOAST_TUPLE_TARGET bytes through compressing compressible fields and
+ * moving EXTENDED and EXTERNAL data out-of-line.
+ *
+ * The numbers need not be the same, though they currently are.  It doesn't
+ * make sense for TARGET to exceed THRESHOLD, but it could be useful to make
+ * it be smaller.
+ *
+ * Currently we choose both values to match the largest tuple size for which
+ * TOAST_TUPLES_PER_PAGE tuples can fit on a heap page.
+ *
+ * XXX while these can be modified without initdb, some thought needs to be
+ * given to needs_toast_table() in toasting.c before unleashing random
+ * changes.  Also see LOBLKSIZE in large_object.h, which can *not* be
+ * changed without initdb.
+ */
+#define TOAST_TUPLES_PER_PAGE	4
+
+#define TOAST_TUPLE_THRESHOLD	MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE)
+
+#define TOAST_TUPLE_TARGET		TOAST_TUPLE_THRESHOLD
+
+/*
+ * The code will also consider moving MAIN data out-of-line, but only as a
+ * last resort if the previous steps haven't reached the target tuple size.
+ * In this phase we use a different target size, currently equal to the
+ * largest tuple that will fit on a heap page.  This is reasonable since
+ * the user has told us to keep the data in-line if at all possible.
+ */
+#define TOAST_TUPLES_PER_PAGE_MAIN	1
+
+#define TOAST_TUPLE_TARGET_MAIN MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE_MAIN)
+
+/*
+ * If an index value is larger than TOAST_INDEX_TARGET, we will try to
+ * compress it (we can't move it out-of-line, however).  Note that this
+ * number is per-datum, not per-tuple, for simplicity in index_form_tuple().
+ */
+#define TOAST_INDEX_TARGET		(MaxHeapTupleSize / 16)
+
+/*
+ * When we store an oversize datum externally, we divide it into chunks
+ * containing at most TOAST_MAX_CHUNK_SIZE data bytes.  This number *must*
+ * be small enough that the completed toast-table tuple (including the
+ * ID and sequence fields and all overhead) will fit on a page.
+ * The coding here sets the size on the theory that we want to fit
+ * EXTERN_TUPLES_PER_PAGE tuples of maximum size onto a page.
+ *
+ * NB: Changing TOAST_MAX_CHUNK_SIZE requires an initdb.
+ */
+#define EXTERN_TUPLES_PER_PAGE	4	/* tweak only this */
+
+#define EXTERN_TUPLE_MAX_SIZE	MaximumBytesPerTuple(EXTERN_TUPLES_PER_PAGE)
+
+#define TOAST_MAX_CHUNK_SIZE	\
+	(EXTERN_TUPLE_MAX_SIZE -							\
+	 MAXALIGN(SizeofHeapTupleHeader) -					\
+	 sizeof(Oid) -										\
+	 sizeof(int32) -									\
+	 VARHDRSZ)
+
+/* ----------
+ * heap_toast_insert_or_update -
+ *
+ *	Called by heap_insert() and heap_update().
+ * ----------
+ */
+extern HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup,
+											 HeapTuple oldtup, int options);
+
+/* ----------
+ * heap_toast_delete -
+ *
+ *	Called by heap_delete().
+ * ----------
+ */
+extern void heap_toast_delete(Relation rel, HeapTuple oldtup,
+							  bool is_speculative);
+
+/* ----------
+ * toast_flatten_tuple -
+ *
+ *	"Flatten" a tuple to contain no out-of-line toasted fields.
+ *	(This does not eliminate compressed or short-header datums.)
+ * ----------
+ */
+extern HeapTuple toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc);
+
+/* ----------
+ * toast_flatten_tuple_to_datum -
+ *
+ *	"Flatten" a tuple containing out-of-line toasted fields into a Datum.
+ * ----------
+ */
+extern Datum toast_flatten_tuple_to_datum(HeapTupleHeader tup,
+										  uint32 tup_len,
+										  TupleDesc tupleDesc);
+
+/* ----------
+ * toast_build_flattened_tuple -
+ *
+ *	Build a tuple containing no out-of-line toasted fields.
+ *	(This does not eliminate compressed or short-header datums.)
+ * ----------
+ */
+extern HeapTuple toast_build_flattened_tuple(TupleDesc tupleDesc,
+											 Datum *values,
+											 bool *isnull);
+
+/* ----------
+ * heap_fetch_toast_slice
+ *
+ *	Fetch a slice from a toast value stored in a heap table.
+ * ----------
+ */
+extern void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
+								   int32 attrsize, int32 sliceoffset,
+								   int32 slicelength, struct varlena *result);
+
+#endif							/* HEAPTOAST_H */
diff --git a/src/include/access/hio.h b/src/include/access/hio.h
new file mode 100644
index 0000000..1d61128
--- /dev/null
+++ b/src/include/access/hio.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * hio.h
+ *	  POSTGRES heap access method input/output definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/hio.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HIO_H
+#define HIO_H
+
+#include "access/htup.h"
+#include "storage/buf.h"
+#include "utils/relcache.h"
+
+/*
+ * state for bulk inserts --- private to heapam.c and hio.c
+ *
+ * If current_buf isn't InvalidBuffer, then we are holding an extra pin
+ * on that buffer.
+ *
+ * "typedef struct BulkInsertStateData *BulkInsertState" is in heapam.h
+ */
+typedef struct BulkInsertStateData
+{
+	BufferAccessStrategy strategy;	/* our BULKWRITE strategy object */
+	Buffer		current_buf;	/* current insertion target page */
+} BulkInsertStateData;
+
+
+extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
+								 HeapTuple tuple, bool token);
+extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
+										Buffer otherBuffer, int options,
+										BulkInsertStateData *bistate,
+										Buffer *vmbuffer, Buffer *vmbuffer_other);
+
+#endif							/* HIO_H */
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
new file mode 100644
index 0000000..cf0bbd7
--- /dev/null
+++ b/src/include/access/htup.h
@@ -0,0 +1,89 @@
+/*-------------------------------------------------------------------------
+ *
+ * htup.h
+ *	  POSTGRES heap tuple definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/htup.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HTUP_H
+#define HTUP_H
+
+#include "storage/itemptr.h"
+
+/* typedefs and forward declarations for structs defined in htup_details.h */
+
+typedef struct HeapTupleHeaderData HeapTupleHeaderData;
+
+typedef HeapTupleHeaderData *HeapTupleHeader;
+
+typedef struct MinimalTupleData MinimalTupleData;
+
+typedef MinimalTupleData *MinimalTuple;
+
+
+/*
+ * HeapTupleData is an in-memory data structure that points to a tuple.
+ *
+ * There are several ways in which this data structure is used:
+ *
+ * * Pointer to a tuple in a disk buffer: t_data points directly into the
+ *	 buffer (which the code had better be holding a pin on, but this is not
+ *	 reflected in HeapTupleData itself).
+ *
+ * * Pointer to nothing: t_data is NULL.  This is used as a failure indication
+ *	 in some functions.
+ *
+ * * Part of a palloc'd tuple: the HeapTupleData itself and the tuple
+ *	 form a single palloc'd chunk.  t_data points to the memory location
+ *	 immediately following the HeapTupleData struct (at offset HEAPTUPLESIZE).
+ *	 This is the output format of heap_form_tuple and related routines.
+ *
+ * * Separately allocated tuple: t_data points to a palloc'd chunk that
+ *	 is not adjacent to the HeapTupleData.  (This case is deprecated since
+ *	 it's difficult to tell apart from case #1.  It should be used only in
+ *	 limited contexts where the code knows that case #1 will never apply.)
+ *
+ * * Separately allocated minimal tuple: t_data points MINIMAL_TUPLE_OFFSET
+ *	 bytes before the start of a MinimalTuple.  As with the previous case,
+ *	 this can't be told apart from case #1 by inspection; code setting up
+ *	 or destroying this representation has to know what it's doing.
+ *
+ * t_len should always be valid, except in the pointer-to-nothing case.
+ * t_self and t_tableOid should be valid if the HeapTupleData points to
+ * a disk buffer, or if it represents a copy of a tuple on disk.  They
+ * should be explicitly set invalid in manufactured tuples.
+ */
+typedef struct HeapTupleData
+{
+	uint32		t_len;			/* length of *t_data */
+	ItemPointerData t_self;		/* SelfItemPointer */
+	Oid			t_tableOid;		/* table the tuple came from */
+#define FIELDNO_HEAPTUPLEDATA_DATA 3
+	HeapTupleHeader t_data;		/* -> tuple header and data */
+} HeapTupleData;
+
+typedef HeapTupleData *HeapTuple;
+
+#define HEAPTUPLESIZE	MAXALIGN(sizeof(HeapTupleData))
+
+/*
+ * Accessor macros to be used with HeapTuple pointers.
+ */
+#define HeapTupleIsValid(tuple) PointerIsValid(tuple)
+
+/* HeapTupleHeader functions implemented in utils/time/combocid.c */
+extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
+extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
+extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
+									  CommandId *cmax, bool *iscombo);
+
+/* Prototype for HeapTupleHeader accessors in heapam.c */
+extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple);
+
+#endif							/* HTUP_H */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
new file mode 100644
index 0000000..960772f
--- /dev/null
+++ b/src/include/access/htup_details.h
@@ -0,0 +1,818 @@
+/*-------------------------------------------------------------------------
+ *
+ * htup_details.h
+ *	  POSTGRES heap tuple header definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/htup_details.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HTUP_DETAILS_H
+#define HTUP_DETAILS_H
+
+#include "access/htup.h"
+#include "access/transam.h"
+#include "access/tupdesc.h"
+#include "access/tupmacs.h"
+#include "storage/bufpage.h"
+
+/*
+ * MaxTupleAttributeNumber limits the number of (user) columns in a tuple.
+ * The key limit on this value is that the size of the fixed overhead for
+ * a tuple, plus the size of the null-values bitmap (at 1 bit per column),
+ * plus MAXALIGN alignment, must fit into t_hoff which is uint8.  On most
+ * machines the upper limit without making t_hoff wider would be a little
+ * over 1700.  We use round numbers here and for MaxHeapAttributeNumber
+ * so that alterations in HeapTupleHeaderData layout won't change the
+ * supported max number of columns.
+ */
+#define MaxTupleAttributeNumber 1664	/* 8 * 208 */
+
+/*
+ * MaxHeapAttributeNumber limits the number of (user) columns in a table.
+ * This should be somewhat less than MaxTupleAttributeNumber.  It must be
+ * at least one less, else we will fail to do UPDATEs on a maximal-width
+ * table (because UPDATE has to form working tuples that include CTID).
+ * In practice we want some additional daylight so that we can gracefully
+ * support operations that add hidden "resjunk" columns, for example
+ * SELECT * FROM wide_table ORDER BY foo, bar, baz.
+ * In any case, depending on column data types you will likely be running
+ * into the disk-block-based limit on overall tuple size if you have more
+ * than a thousand or so columns.  TOAST won't help.
+ */
+#define MaxHeapAttributeNumber	1600	/* 8 * 200 */
+
+/*
+ * Heap tuple header.  To avoid wasting space, the fields should be
+ * laid out in such a way as to avoid structure padding.
+ *
+ * Datums of composite types (row types) share the same general structure
+ * as on-disk tuples, so that the same routines can be used to build and
+ * examine them.  However the requirements are slightly different: a Datum
+ * does not need any transaction visibility information, and it does need
+ * a length word and some embedded type information.  We can achieve this
+ * by overlaying the xmin/cmin/xmax/cmax/xvac fields of a heap tuple
+ * with the fields needed in the Datum case.  Typically, all tuples built
+ * in-memory will be initialized with the Datum fields; but when a tuple is
+ * about to be inserted in a table, the transaction fields will be filled,
+ * overwriting the datum fields.
+ *
+ * The overall structure of a heap tuple looks like:
+ *			fixed fields (HeapTupleHeaderData struct)
+ *			nulls bitmap (if HEAP_HASNULL is set in t_infomask)
+ *			alignment padding (as needed to make user data MAXALIGN'd)
+ *			object ID (if HEAP_HASOID_OLD is set in t_infomask, not created
+ *          anymore)
+ *			user data fields
+ *
+ * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac in three
+ * physical fields.  Xmin and Xmax are always really stored, but Cmin, Cmax
+ * and Xvac share a field.  This works because we know that Cmin and Cmax
+ * are only interesting for the lifetime of the inserting and deleting
+ * transaction respectively.  If a tuple is inserted and deleted in the same
+ * transaction, we store a "combo" command id that can be mapped to the real
+ * cmin and cmax, but only by use of local state within the originating
+ * backend.  See combocid.c for more details.  Meanwhile, Xvac is only set by
+ * old-style VACUUM FULL, which does not have any command sub-structure and so
+ * does not need either Cmin or Cmax.  (This requires that old-style VACUUM
+ * FULL never try to move a tuple whose Cmin or Cmax is still interesting,
+ * ie, an insert-in-progress or delete-in-progress tuple.)
+ *
+ * A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
+ * is initialized with its own TID (location).  If the tuple is ever updated,
+ * its t_ctid is changed to point to the replacement version of the tuple.  Or
+ * if the tuple is moved from one partition to another, due to an update of
+ * the partition key, t_ctid is set to a special value to indicate that
+ * (see ItemPointerSetMovedPartitions).  Thus, a tuple is the latest version
+ * of its row iff XMAX is invalid or
+ * t_ctid points to itself (in which case, if XMAX is valid, the tuple is
+ * either locked or deleted).  One can follow the chain of t_ctid links
+ * to find the newest version of the row, unless it was moved to a different
+ * partition.  Beware however that VACUUM might
+ * erase the pointed-to (newer) tuple before erasing the pointing (older)
+ * tuple.  Hence, when following a t_ctid link, it is necessary to check
+ * to see if the referenced slot is empty or contains an unrelated tuple.
+ * Check that the referenced tuple has XMIN equal to the referencing tuple's
+ * XMAX to verify that it is actually the descendant version and not an
+ * unrelated tuple stored into a slot recently freed by VACUUM.  If either
+ * check fails, one may assume that there is no live descendant version.
+ *
+ * t_ctid is sometimes used to store a speculative insertion token, instead
+ * of a real TID.  A speculative token is set on a tuple that's being
+ * inserted, until the inserter is sure that it wants to go ahead with the
+ * insertion.  Hence a token should only be seen on a tuple with an XMAX
+ * that's still in-progress, or invalid/aborted.  The token is replaced with
+ * the tuple's real TID when the insertion is confirmed.  One should never
+ * see a speculative insertion token while following a chain of t_ctid links,
+ * because they are not used on updates, only insertions.
+ *
+ * Following the fixed header fields, the nulls bitmap is stored (beginning
+ * at t_bits).  The bitmap is *not* stored if t_infomask shows that there
+ * are no nulls in the tuple.  If an OID field is present (as indicated by
+ * t_infomask), then it is stored just before the user data, which begins at
+ * the offset shown by t_hoff.  Note that t_hoff must be a multiple of
+ * MAXALIGN.
+ */
+
+typedef struct HeapTupleFields
+{
+	TransactionId t_xmin;		/* inserting xact ID */
+	TransactionId t_xmax;		/* deleting or locking xact ID */
+
+	union
+	{
+		CommandId	t_cid;		/* inserting or deleting command ID, or both */
+		TransactionId t_xvac;	/* old-style VACUUM FULL xact ID */
+	}			t_field3;
+} HeapTupleFields;
+
+typedef struct DatumTupleFields
+{
+	int32		datum_len_;		/* varlena header (do not touch directly!) */
+
+	int32		datum_typmod;	/* -1, or identifier of a record type */
+
+	Oid			datum_typeid;	/* composite type OID, or RECORDOID */
+
+	/*
+	 * datum_typeid cannot be a domain over composite, only plain composite,
+	 * even if the datum is meant as a value of a domain-over-composite type.
+	 * This is in line with the general principle that CoerceToDomain does not
+	 * change the physical representation of the base type value.
+	 *
+	 * Note: field ordering is chosen with thought that Oid might someday
+	 * widen to 64 bits.
+	 */
+} DatumTupleFields;
+
+struct HeapTupleHeaderData
+{
+	union
+	{
+		HeapTupleFields t_heap;
+		DatumTupleFields t_datum;
+	}			t_choice;
+
+	ItemPointerData t_ctid;		/* current TID of this or newer tuple (or a
+								 * speculative insertion token) */
+
+	/* Fields below here must match MinimalTupleData! */
+
+#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2 2
+	uint16		t_infomask2;	/* number of attributes + various flags */
+
+#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK 3
+	uint16		t_infomask;		/* various flag bits, see below */
+
+#define FIELDNO_HEAPTUPLEHEADERDATA_HOFF 4
+	uint8		t_hoff;			/* sizeof header incl. bitmap, padding */
+
+	/* ^ - 23 bytes - ^ */
+
+#define FIELDNO_HEAPTUPLEHEADERDATA_BITS 5
+	bits8		t_bits[FLEXIBLE_ARRAY_MEMBER];	/* bitmap of NULLs */
+
+	/* MORE DATA FOLLOWS AT END OF STRUCT */
+};
+
+/* typedef appears in htup.h */
+
+#define SizeofHeapTupleHeader offsetof(HeapTupleHeaderData, t_bits)
+
+/*
+ * information stored in t_infomask:
+ */
+#define HEAP_HASNULL			0x0001	/* has null attribute(s) */
+#define HEAP_HASVARWIDTH		0x0002	/* has variable-width attribute(s) */
+#define HEAP_HASEXTERNAL		0x0004	/* has external stored attribute(s) */
+#define HEAP_HASOID_OLD			0x0008	/* has an object-id field */
+#define HEAP_XMAX_KEYSHR_LOCK	0x0010	/* xmax is a key-shared locker */
+#define HEAP_COMBOCID			0x0020	/* t_cid is a combo CID */
+#define HEAP_XMAX_EXCL_LOCK		0x0040	/* xmax is exclusive locker */
+#define HEAP_XMAX_LOCK_ONLY		0x0080	/* xmax, if valid, is only a locker */
+
+ /* xmax is a shared locker */
+#define HEAP_XMAX_SHR_LOCK	(HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)
+
+#define HEAP_LOCK_MASK	(HEAP_XMAX_SHR_LOCK | HEAP_XMAX_EXCL_LOCK | \
+						 HEAP_XMAX_KEYSHR_LOCK)
+#define HEAP_XMIN_COMMITTED		0x0100	/* t_xmin committed */
+#define HEAP_XMIN_INVALID		0x0200	/* t_xmin invalid/aborted */
+#define HEAP_XMIN_FROZEN		(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID)
+#define HEAP_XMAX_COMMITTED		0x0400	/* t_xmax committed */
+#define HEAP_XMAX_INVALID		0x0800	/* t_xmax invalid/aborted */
+#define HEAP_XMAX_IS_MULTI		0x1000	/* t_xmax is a MultiXactId */
+#define HEAP_UPDATED			0x2000	/* this is UPDATEd version of row */
+#define HEAP_MOVED_OFF			0x4000	/* moved to another place by pre-9.0
+										 * VACUUM FULL; kept for binary
+										 * upgrade support */
+#define HEAP_MOVED_IN			0x8000	/* moved from another place by pre-9.0
+										 * VACUUM FULL; kept for binary
+										 * upgrade support */
+#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)
+
+#define HEAP_XACT_MASK			0xFFF0	/* visibility-related bits */
+
+/*
+ * A tuple is only locked (i.e. not updated by its Xmax) if the
+ * HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is
+ * not a multi and the EXCL_LOCK bit is set.
+ *
+ * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible
+ * aborted updater transaction.
+ *
+ * Beware of multiple evaluations of the argument.
+ */
+#define HEAP_XMAX_IS_LOCKED_ONLY(infomask) \
+	(((infomask) & HEAP_XMAX_LOCK_ONLY) || \
+	 (((infomask) & (HEAP_XMAX_IS_MULTI | HEAP_LOCK_MASK)) == HEAP_XMAX_EXCL_LOCK))
+
+/*
+ * A tuple that has HEAP_XMAX_IS_MULTI and HEAP_XMAX_LOCK_ONLY but neither of
+ * HEAP_XMAX_EXCL_LOCK and HEAP_XMAX_KEYSHR_LOCK must come from a tuple that was
+ * share-locked in 9.2 or earlier and then pg_upgrade'd.
+ *
+ * In 9.2 and prior, HEAP_XMAX_IS_MULTI was only set when there were multiple
+ * FOR SHARE lockers of that tuple.  That set HEAP_XMAX_LOCK_ONLY (with a
+ * different name back then) but neither of HEAP_XMAX_EXCL_LOCK and
+ * HEAP_XMAX_KEYSHR_LOCK.  That combination is no longer possible in 9.3 and
+ * up, so if we see that combination we know for certain that the tuple was
+ * locked in an earlier release; since all such lockers are gone (they cannot
+ * survive through pg_upgrade), such tuples can safely be considered not
+ * locked.
+ *
+ * We must not resolve such multixacts locally, because the result would be
+ * bogus, regardless of where they stand with respect to the current valid
+ * multixact range.
+ */
+#define HEAP_LOCKED_UPGRADED(infomask) \
+( \
+	 ((infomask) & HEAP_XMAX_IS_MULTI) != 0 && \
+	 ((infomask) & HEAP_XMAX_LOCK_ONLY) != 0 && \
+	 (((infomask) & (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)) == 0) \
+)
+
+/*
+ * Use these to test whether a particular lock is applied to a tuple
+ */
+#define HEAP_XMAX_IS_SHR_LOCKED(infomask) \
+	(((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_SHR_LOCK)
+#define HEAP_XMAX_IS_EXCL_LOCKED(infomask) \
+	(((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_EXCL_LOCK)
+#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) \
+	(((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_KEYSHR_LOCK)
+
+/* turn these all off when Xmax is to change */
+#define HEAP_XMAX_BITS (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | \
+						HEAP_XMAX_IS_MULTI | HEAP_LOCK_MASK | HEAP_XMAX_LOCK_ONLY)
+
+/*
+ * information stored in t_infomask2:
+ */
+#define HEAP_NATTS_MASK			0x07FF	/* 11 bits for number of attributes */
+/* bits 0x1800 are available */
+#define HEAP_KEYS_UPDATED		0x2000	/* tuple was updated and key cols
+										 * modified, or tuple deleted */
+#define HEAP_HOT_UPDATED		0x4000	/* tuple was HOT-updated */
+#define HEAP_ONLY_TUPLE			0x8000	/* this is heap-only tuple */
+
+#define HEAP2_XACT_MASK			0xE000	/* visibility-related bits */
+
+/*
+ * HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins.  It is
+ * only used in tuples that are in the hash table, and those don't need
+ * any visibility information, so we can overlay it on a visibility flag
+ * instead of using up a dedicated bit.
+ */
+#define HEAP_TUPLE_HAS_MATCH	HEAP_ONLY_TUPLE /* tuple has a join match */
+
+/*
+ * HeapTupleHeader accessor macros
+ *
+ * Note: beware of multiple evaluations of "tup" argument.  But the Set
+ * macros evaluate their other argument only once.
+ */
+
+/*
+ * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid
+ * originally used to insert the tuple.  However, the tuple might actually
+ * be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin
+ * is visible to every snapshot.  Prior to PostgreSQL 9.4, we actually changed
+ * the xmin to FrozenTransactionId, and that value may still be encountered
+ * on disk.
+ */
+#define HeapTupleHeaderGetRawXmin(tup) \
+( \
+	(tup)->t_choice.t_heap.t_xmin \
+)
+
+#define HeapTupleHeaderGetXmin(tup) \
+( \
+	HeapTupleHeaderXminFrozen(tup) ? \
+		FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \
+)
+
+#define HeapTupleHeaderSetXmin(tup, xid) \
+( \
+	(tup)->t_choice.t_heap.t_xmin = (xid) \
+)
+
+#define HeapTupleHeaderXminCommitted(tup) \
+( \
+	((tup)->t_infomask & HEAP_XMIN_COMMITTED) != 0 \
+)
+
+#define HeapTupleHeaderXminInvalid(tup) \
+( \
+	((tup)->t_infomask & (HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID)) == \
+		HEAP_XMIN_INVALID \
+)
+
+#define HeapTupleHeaderXminFrozen(tup) \
+( \
+	((tup)->t_infomask & (HEAP_XMIN_FROZEN)) == HEAP_XMIN_FROZEN \
+)
+
+#define HeapTupleHeaderSetXminCommitted(tup) \
+( \
+	AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
+	((tup)->t_infomask |= HEAP_XMIN_COMMITTED) \
+)
+
+#define HeapTupleHeaderSetXminInvalid(tup) \
+( \
+	AssertMacro(!HeapTupleHeaderXminCommitted(tup)), \
+	((tup)->t_infomask |= HEAP_XMIN_INVALID) \
+)
+
+#define HeapTupleHeaderSetXminFrozen(tup) \
+( \
+	AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
+	((tup)->t_infomask |= HEAP_XMIN_FROZEN) \
+)
+
+/*
+ * HeapTupleHeaderGetRawXmax gets you the raw Xmax field.  To find out the Xid
+ * that updated a tuple, you might need to resolve the MultiXactId if certain
+ * bits are set.  HeapTupleHeaderGetUpdateXid checks those bits and takes care
+ * to resolve the MultiXactId if necessary.  This might involve multixact I/O,
+ * so it should only be used if absolutely necessary.
+ */
+#define HeapTupleHeaderGetUpdateXid(tup) \
+( \
+	(!((tup)->t_infomask & HEAP_XMAX_INVALID) && \
+	 ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \
+	 !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
+		HeapTupleGetUpdateXid(tup) \
+	: \
+		HeapTupleHeaderGetRawXmax(tup) \
+)
+
+#define HeapTupleHeaderGetRawXmax(tup) \
+( \
+	(tup)->t_choice.t_heap.t_xmax \
+)
+
+#define HeapTupleHeaderSetXmax(tup, xid) \
+( \
+	(tup)->t_choice.t_heap.t_xmax = (xid) \
+)
+
+/*
+ * HeapTupleHeaderGetRawCommandId will give you what's in the header whether
+ * it is useful or not.  Most code should use HeapTupleHeaderGetCmin or
+ * HeapTupleHeaderGetCmax instead, but note that those Assert that you can
+ * get a legitimate result, ie you are in the originating transaction!
+ */
+#define HeapTupleHeaderGetRawCommandId(tup) \
+( \
+	(tup)->t_choice.t_heap.t_field3.t_cid \
+)
+
+/* SetCmin is reasonably simple since we never need a combo CID */
+#define HeapTupleHeaderSetCmin(tup, cid) \
+do { \
+	Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+	(tup)->t_choice.t_heap.t_field3.t_cid = (cid); \
+	(tup)->t_infomask &= ~HEAP_COMBOCID; \
+} while (0)
+
+/* SetCmax must be used after HeapTupleHeaderAdjustCmax; see combocid.c */
+#define HeapTupleHeaderSetCmax(tup, cid, iscombo) \
+do { \
+	Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+	(tup)->t_choice.t_heap.t_field3.t_cid = (cid); \
+	if (iscombo) \
+		(tup)->t_infomask |= HEAP_COMBOCID; \
+	else \
+		(tup)->t_infomask &= ~HEAP_COMBOCID; \
+} while (0)
+
+#define HeapTupleHeaderGetXvac(tup) \
+( \
+	((tup)->t_infomask & HEAP_MOVED) ? \
+		(tup)->t_choice.t_heap.t_field3.t_xvac \
+	: \
+		InvalidTransactionId \
+)
+
+#define HeapTupleHeaderSetXvac(tup, xid) \
+do { \
+	Assert((tup)->t_infomask & HEAP_MOVED); \
+	(tup)->t_choice.t_heap.t_field3.t_xvac = (xid); \
+} while (0)
+
+#define HeapTupleHeaderIsSpeculative(tup) \
+( \
+	(ItemPointerGetOffsetNumberNoCheck(&(tup)->t_ctid) == SpecTokenOffsetNumber) \
+)
+
+#define HeapTupleHeaderGetSpeculativeToken(tup) \
+( \
+	AssertMacro(HeapTupleHeaderIsSpeculative(tup)), \
+	ItemPointerGetBlockNumber(&(tup)->t_ctid) \
+)
+
+#define HeapTupleHeaderSetSpeculativeToken(tup, token)	\
+( \
+	ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
+)
+
+#define HeapTupleHeaderIndicatesMovedPartitions(tup) \
+	ItemPointerIndicatesMovedPartitions(&(tup)->t_ctid)
+
+#define HeapTupleHeaderSetMovedPartitions(tup) \
+	ItemPointerSetMovedPartitions(&(tup)->t_ctid)
+
+#define HeapTupleHeaderGetDatumLength(tup) \
+	VARSIZE(tup)
+
+#define HeapTupleHeaderSetDatumLength(tup, len) \
+	SET_VARSIZE(tup, len)
+
+#define HeapTupleHeaderGetTypeId(tup) \
+( \
+	(tup)->t_choice.t_datum.datum_typeid \
+)
+
+#define HeapTupleHeaderSetTypeId(tup, typeid) \
+( \
+	(tup)->t_choice.t_datum.datum_typeid = (typeid) \
+)
+
+#define HeapTupleHeaderGetTypMod(tup) \
+( \
+	(tup)->t_choice.t_datum.datum_typmod \
+)
+
+#define HeapTupleHeaderSetTypMod(tup, typmod) \
+( \
+	(tup)->t_choice.t_datum.datum_typmod = (typmod) \
+)
+
+/*
+ * Note that we stop considering a tuple HOT-updated as soon as it is known
+ * aborted or the would-be updating transaction is known aborted.  For best
+ * efficiency, check tuple visibility before using this macro, so that the
+ * INVALID bits will be as up to date as possible.
+ */
+#define HeapTupleHeaderIsHotUpdated(tup) \
+( \
+	((tup)->t_infomask2 & HEAP_HOT_UPDATED) != 0 && \
+	((tup)->t_infomask & HEAP_XMAX_INVALID) == 0 && \
+	!HeapTupleHeaderXminInvalid(tup) \
+)
+
+#define HeapTupleHeaderSetHotUpdated(tup) \
+( \
+	(tup)->t_infomask2 |= HEAP_HOT_UPDATED \
+)
+
+#define HeapTupleHeaderClearHotUpdated(tup) \
+( \
+	(tup)->t_infomask2 &= ~HEAP_HOT_UPDATED \
+)
+
+#define HeapTupleHeaderIsHeapOnly(tup) \
+( \
+  ((tup)->t_infomask2 & HEAP_ONLY_TUPLE) != 0 \
+)
+
+#define HeapTupleHeaderSetHeapOnly(tup) \
+( \
+  (tup)->t_infomask2 |= HEAP_ONLY_TUPLE \
+)
+
+#define HeapTupleHeaderClearHeapOnly(tup) \
+( \
+  (tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
+)
+
+#define HeapTupleHeaderHasMatch(tup) \
+( \
+  ((tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH) != 0 \
+)
+
+#define HeapTupleHeaderSetMatch(tup) \
+( \
+  (tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderClearMatch(tup) \
+( \
+  (tup)->t_infomask2 &= ~HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderGetNatts(tup) \
+	((tup)->t_infomask2 & HEAP_NATTS_MASK)
+
+#define HeapTupleHeaderSetNatts(tup, natts) \
+( \
+	(tup)->t_infomask2 = ((tup)->t_infomask2 & ~HEAP_NATTS_MASK) | (natts) \
+)
+
+#define HeapTupleHeaderHasExternal(tup) \
+		(((tup)->t_infomask & HEAP_HASEXTERNAL) != 0)
+
+
+/*
+ * BITMAPLEN(NATTS) -
+ *		Computes size of null bitmap given number of data columns.
+ */
+#define BITMAPLEN(NATTS)	(((int)(NATTS) + 7) / 8)
+
+/*
+ * MaxHeapTupleSize is the maximum allowed size of a heap tuple, including
+ * header and MAXALIGN alignment padding.  Basically it's BLCKSZ minus the
+ * other stuff that has to be on a disk page.  Since heap pages use no
+ * "special space", there's no deduction for that.
+ *
+ * NOTE: we allow for the ItemId that must point to the tuple, ensuring that
+ * an otherwise-empty page can indeed hold a tuple of this size.  Because
+ * ItemIds and tuples have different alignment requirements, don't assume that
+ * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
+ */
+#define MaxHeapTupleSize  (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+#define MinHeapTupleSize  MAXALIGN(SizeofHeapTupleHeader)
+
+/*
+ * MaxHeapTuplesPerPage is an upper bound on the number of tuples that can
+ * fit on one heap page.  (Note that indexes could have more, because they
+ * use a smaller tuple header.)  We arrive at the divisor because each tuple
+ * must be maxaligned, and it must have an associated line pointer.
+ *
+ * Note: with HOT, there could theoretically be more line pointers (not actual
+ * tuples) than this on a heap page.  However we constrain the number of line
+ * pointers to this anyway, to avoid excessive line-pointer bloat and not
+ * require increases in the size of work arrays.
+ */
+#define MaxHeapTuplesPerPage	\
+	((int) ((BLCKSZ - SizeOfPageHeaderData) / \
+			(MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))))
+
+/*
+ * MaxAttrSize is a somewhat arbitrary upper limit on the declared size of
+ * data fields of char(n) and similar types.  It need not have anything
+ * directly to do with the *actual* upper limit of varlena values, which
+ * is currently 1Gb (see TOAST structures in postgres.h).  I've set it
+ * at 10Mb which seems like a reasonable number --- tgl 8/6/00.
+ */
+#define MaxAttrSize		(10 * 1024 * 1024)
+
+
+/*
+ * MinimalTuple is an alternative representation that is used for transient
+ * tuples inside the executor, in places where transaction status information
+ * is not required, the tuple rowtype is known, and shaving off a few bytes
+ * is worthwhile because we need to store many tuples.  The representation
+ * is chosen so that tuple access routines can work with either full or
+ * minimal tuples via a HeapTupleData pointer structure.  The access routines
+ * see no difference, except that they must not access the transaction status
+ * or t_ctid fields because those aren't there.
+ *
+ * For the most part, MinimalTuples should be accessed via TupleTableSlot
+ * routines.  These routines will prevent access to the "system columns"
+ * and thereby prevent accidental use of the nonexistent fields.
+ *
+ * MinimalTupleData contains a length word, some padding, and fields matching
+ * HeapTupleHeaderData beginning with t_infomask2. The padding is chosen so
+ * that offsetof(t_infomask2) is the same modulo MAXIMUM_ALIGNOF in both
+ * structs.   This makes data alignment rules equivalent in both cases.
+ *
+ * When a minimal tuple is accessed via a HeapTupleData pointer, t_data is
+ * set to point MINIMAL_TUPLE_OFFSET bytes before the actual start of the
+ * minimal tuple --- that is, where a full tuple matching the minimal tuple's
+ * data would start.  This trick is what makes the structs seem equivalent.
+ *
+ * Note that t_hoff is computed the same as in a full tuple, hence it includes
+ * the MINIMAL_TUPLE_OFFSET distance.  t_len does not include that, however.
+ *
+ * MINIMAL_TUPLE_DATA_OFFSET is the offset to the first useful (non-pad) data
+ * other than the length word.  tuplesort.c and tuplestore.c use this to avoid
+ * writing the padding to disk.
+ */
+#define MINIMAL_TUPLE_OFFSET \
+	((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF)
+#define MINIMAL_TUPLE_PADDING \
+	((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) % MAXIMUM_ALIGNOF)
+#define MINIMAL_TUPLE_DATA_OFFSET \
+	offsetof(MinimalTupleData, t_infomask2)
+
+struct MinimalTupleData
+{
+	uint32		t_len;			/* actual length of minimal tuple */
+
+	char		mt_padding[MINIMAL_TUPLE_PADDING];
+
+	/* Fields below here must match HeapTupleHeaderData! */
+
+	uint16		t_infomask2;	/* number of attributes + various flags */
+
+	uint16		t_infomask;		/* various flag bits, see below */
+
+	uint8		t_hoff;			/* sizeof header incl. bitmap, padding */
+
+	/* ^ - 23 bytes - ^ */
+
+	bits8		t_bits[FLEXIBLE_ARRAY_MEMBER];	/* bitmap of NULLs */
+
+	/* MORE DATA FOLLOWS AT END OF STRUCT */
+};
+
+/* typedef appears in htup.h */
+
+#define SizeofMinimalTupleHeader offsetof(MinimalTupleData, t_bits)
+
+
+/*
+ * GETSTRUCT - given a HeapTuple pointer, return address of the user data
+ */
+#define GETSTRUCT(TUP) ((char *) ((TUP)->t_data) + (TUP)->t_data->t_hoff)
+
+/*
+ * Accessor macros to be used with HeapTuple pointers.
+ */
+
+#define HeapTupleHasNulls(tuple) \
+		(((tuple)->t_data->t_infomask & HEAP_HASNULL) != 0)
+
+#define HeapTupleNoNulls(tuple) \
+		(!((tuple)->t_data->t_infomask & HEAP_HASNULL))
+
+#define HeapTupleHasVarWidth(tuple) \
+		(((tuple)->t_data->t_infomask & HEAP_HASVARWIDTH) != 0)
+
+#define HeapTupleAllFixed(tuple) \
+		(!((tuple)->t_data->t_infomask & HEAP_HASVARWIDTH))
+
+#define HeapTupleHasExternal(tuple) \
+		(((tuple)->t_data->t_infomask & HEAP_HASEXTERNAL) != 0)
+
+#define HeapTupleIsHotUpdated(tuple) \
+		HeapTupleHeaderIsHotUpdated((tuple)->t_data)
+
+#define HeapTupleSetHotUpdated(tuple) \
+		HeapTupleHeaderSetHotUpdated((tuple)->t_data)
+
+#define HeapTupleClearHotUpdated(tuple) \
+		HeapTupleHeaderClearHotUpdated((tuple)->t_data)
+
+#define HeapTupleIsHeapOnly(tuple) \
+		HeapTupleHeaderIsHeapOnly((tuple)->t_data)
+
+#define HeapTupleSetHeapOnly(tuple) \
+		HeapTupleHeaderSetHeapOnly((tuple)->t_data)
+
+#define HeapTupleClearHeapOnly(tuple) \
+		HeapTupleHeaderClearHeapOnly((tuple)->t_data)
+
+
+/* ----------------
+ *		fastgetattr
+ *
+ *		Fetch a user attribute's value as a Datum (might be either a
+ *		value, or a pointer into the data area of the tuple).
+ *
+ *		This must not be used when a system attribute might be requested.
+ *		Furthermore, the passed attnum MUST be valid.  Use heap_getattr()
+ *		instead, if in doubt.
+ *
+ *		This gets called many times, so we macro the cacheable and NULL
+ *		lookups, and call nocachegetattr() for the rest.
+ * ----------------
+ */
+
+#if !defined(DISABLE_COMPLEX_MACRO)
+
+#define fastgetattr(tup, attnum, tupleDesc, isnull)					\
+(																	\
+	AssertMacro((attnum) > 0),										\
+	(*(isnull) = false),											\
+	HeapTupleNoNulls(tup) ?											\
+	(																\
+		TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff >= 0 ?	\
+		(															\
+			fetchatt(TupleDescAttr((tupleDesc), (attnum)-1),		\
+				(char *) (tup)->t_data + (tup)->t_data->t_hoff +	\
+				TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff)\
+		)															\
+		:															\
+			nocachegetattr((tup), (attnum), (tupleDesc))			\
+	)																\
+	:																\
+	(																\
+		att_isnull((attnum)-1, (tup)->t_data->t_bits) ?				\
+		(															\
+			(*(isnull) = true),										\
+			(Datum)NULL												\
+		)															\
+		:															\
+		(															\
+			nocachegetattr((tup), (attnum), (tupleDesc))			\
+		)															\
+	)																\
+)
+#else							/* defined(DISABLE_COMPLEX_MACRO) */
+
+extern Datum fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
+						 bool *isnull);
+#endif							/* defined(DISABLE_COMPLEX_MACRO) */
+
+
+/* ----------------
+ *		heap_getattr
+ *
+ *		Extract an attribute of a heap tuple and return it as a Datum.
+ *		This works for either system or user attributes.  The given attnum
+ *		is properly range-checked.
+ *
+ *		If the field in question has a NULL value, we return a zero Datum
+ *		and set *isnull == true.  Otherwise, we set *isnull == false.
+ *
+ *		<tup> is the pointer to the heap tuple.  <attnum> is the attribute
+ *		number of the column (field) caller wants.  <tupleDesc> is a
+ *		pointer to the structure describing the row and all its fields.
+ * ----------------
+ */
+#define heap_getattr(tup, attnum, tupleDesc, isnull) \
+	( \
+		((attnum) > 0) ? \
+		( \
+			((attnum) > (int) HeapTupleHeaderGetNatts((tup)->t_data)) ? \
+				getmissingattr((tupleDesc), (attnum), (isnull)) \
+			: \
+				fastgetattr((tup), (attnum), (tupleDesc), (isnull)) \
+		) \
+		: \
+			heap_getsysattr((tup), (attnum), (tupleDesc), (isnull)) \
+	)
+
+
+/* prototypes for functions in common/heaptuple.c */
+extern Size heap_compute_data_size(TupleDesc tupleDesc,
+								   Datum *values, bool *isnull);
+extern void heap_fill_tuple(TupleDesc tupleDesc,
+							Datum *values, bool *isnull,
+							char *data, Size data_size,
+							uint16 *infomask, bits8 *bit);
+extern bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc);
+extern Datum nocachegetattr(HeapTuple tup, int attnum,
+							TupleDesc att);
+extern Datum heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc,
+							 bool *isnull);
+extern Datum getmissingattr(TupleDesc tupleDesc,
+							int attnum, bool *isnull);
+extern HeapTuple heap_copytuple(HeapTuple tuple);
+extern void heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest);
+extern Datum heap_copy_tuple_as_datum(HeapTuple tuple, TupleDesc tupleDesc);
+extern HeapTuple heap_form_tuple(TupleDesc tupleDescriptor,
+								 Datum *values, bool *isnull);
+extern HeapTuple heap_modify_tuple(HeapTuple tuple,
+								   TupleDesc tupleDesc,
+								   Datum *replValues,
+								   bool *replIsnull,
+								   bool *doReplace);
+extern HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple,
+										   TupleDesc tupleDesc,
+										   int nCols,
+										   int *replCols,
+										   Datum *replValues,
+										   bool *replIsnull);
+extern void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
+							  Datum *values, bool *isnull);
+extern void heap_freetuple(HeapTuple htup);
+extern MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor,
+											Datum *values, bool *isnull);
+extern void heap_free_minimal_tuple(MinimalTuple mtup);
+extern MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup);
+extern HeapTuple heap_tuple_from_minimal_tuple(MinimalTuple mtup);
+extern MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup);
+extern size_t varsize_any(void *p);
+extern HeapTuple heap_expand_tuple(HeapTuple sourceTuple, TupleDesc tupleDesc);
+extern MinimalTuple minimal_expand_tuple(HeapTuple sourceTuple, TupleDesc tupleDesc);
+
+#endif							/* HTUP_DETAILS_H */
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
new file mode 100644
index 0000000..1917375
--- /dev/null
+++ b/src/include/access/itup.h
@@ -0,0 +1,164 @@
+/*-------------------------------------------------------------------------
+ *
+ * itup.h
+ *	  POSTGRES index tuple definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/itup.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITUP_H
+#define ITUP_H
+
+#include "access/tupdesc.h"
+#include "access/tupmacs.h"
+#include "storage/bufpage.h"
+#include "storage/itemptr.h"
+
+/*
+ * Index tuple header structure
+ *
+ * All index tuples start with IndexTupleData.  If the HasNulls bit is set,
+ * this is followed by an IndexAttributeBitMapData.  The index attribute
+ * values follow, beginning at a MAXALIGN boundary.
+ *
+ * Note that the space allocated for the bitmap does not vary with the number
+ * of attributes; that is because we don't have room to store the number of
+ * attributes in the header.  Given the MAXALIGN constraint there's no space
+ * savings to be had anyway, for usual values of INDEX_MAX_KEYS.
+ */
+
+typedef struct IndexTupleData
+{
+	ItemPointerData t_tid;		/* reference TID to heap tuple */
+
+	/* ---------------
+	 * t_info is laid out in the following fashion:
+	 *
+	 * 15th (high) bit: has nulls
+	 * 14th bit: has var-width attributes
+	 * 13th bit: AM-defined meaning
+	 * 12-0 bit: size of tuple
+	 * ---------------
+	 */
+
+	unsigned short t_info;		/* various info about tuple */
+
+} IndexTupleData;				/* MORE DATA FOLLOWS AT END OF STRUCT */
+
+typedef IndexTupleData *IndexTuple;
+
+typedef struct IndexAttributeBitMapData
+{
+	bits8		bits[(INDEX_MAX_KEYS + 8 - 1) / 8];
+}			IndexAttributeBitMapData;
+
+typedef IndexAttributeBitMapData * IndexAttributeBitMap;
+
+/*
+ * t_info manipulation macros
+ */
+#define INDEX_SIZE_MASK 0x1FFF
+#define INDEX_AM_RESERVED_BIT 0x2000	/* reserved for index-AM specific
+										 * usage */
+#define INDEX_VAR_MASK	0x4000
+#define INDEX_NULL_MASK 0x8000
+
+#define IndexTupleSize(itup)		((Size) ((itup)->t_info & INDEX_SIZE_MASK))
+#define IndexTupleHasNulls(itup)	((((IndexTuple) (itup))->t_info & INDEX_NULL_MASK))
+#define IndexTupleHasVarwidths(itup) ((((IndexTuple) (itup))->t_info & INDEX_VAR_MASK))
+
+
+/*
+ * Takes an infomask as argument (primarily because this needs to be usable
+ * at index_form_tuple time so enough space is allocated).
+ */
+#define IndexInfoFindDataOffset(t_info) \
+( \
+	(!((t_info) & INDEX_NULL_MASK)) ? \
+	( \
+		(Size)MAXALIGN(sizeof(IndexTupleData)) \
+	) \
+	: \
+	( \
+		(Size)MAXALIGN(sizeof(IndexTupleData) + sizeof(IndexAttributeBitMapData)) \
+	) \
+)
+
+/* ----------------
+ *		index_getattr
+ *
+ *		This gets called many times, so we macro the cacheable and NULL
+ *		lookups, and call nocache_index_getattr() for the rest.
+ *
+ * ----------------
+ */
+#define index_getattr(tup, attnum, tupleDesc, isnull) \
+( \
+	AssertMacro(PointerIsValid(isnull) && (attnum) > 0), \
+	*(isnull) = false, \
+	!IndexTupleHasNulls(tup) ? \
+	( \
+		TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff >= 0 ? \
+		( \
+			fetchatt(TupleDescAttr((tupleDesc), (attnum)-1), \
+			(char *) (tup) + IndexInfoFindDataOffset((tup)->t_info) \
+			+ TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff) \
+		) \
+		: \
+			nocache_index_getattr((tup), (attnum), (tupleDesc)) \
+	) \
+	: \
+	( \
+		(att_isnull((attnum)-1, (char *)(tup) + sizeof(IndexTupleData))) ? \
+		( \
+			*(isnull) = true, \
+			(Datum)NULL \
+		) \
+		: \
+		( \
+			nocache_index_getattr((tup), (attnum), (tupleDesc)) \
+		) \
+	) \
+)
+
+/*
+ * MaxIndexTuplesPerPage is an upper bound on the number of tuples that can
+ * fit on one index page.  An index tuple must have either data or a null
+ * bitmap, so we can safely assume it's at least 1 byte bigger than a bare
+ * IndexTupleData struct.  We arrive at the divisor because each tuple
+ * must be maxaligned, and it must have an associated line pointer.
+ *
+ * To be index-type-independent, this does not account for any special space
+ * on the page, and is thus conservative.
+ *
+ * Note: in btree non-leaf pages, the first tuple has no key (it's implicitly
+ * minus infinity), thus breaking the "at least 1 byte bigger" assumption.
+ * On such a page, N tuples could take one MAXALIGN quantum less space than
+ * estimated here, seemingly allowing one more tuple than estimated here.
+ * But such a page always has at least MAXALIGN special space, so we're safe.
+ */
+#define MaxIndexTuplesPerPage	\
+	((int) ((BLCKSZ - SizeOfPageHeaderData) / \
+			(MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))
+
+
+/* routines in indextuple.c */
+extern IndexTuple index_form_tuple(TupleDesc tupleDescriptor,
+								   Datum *values, bool *isnull);
+extern Datum nocache_index_getattr(IndexTuple tup, int attnum,
+								   TupleDesc tupleDesc);
+extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor,
+							   Datum *values, bool *isnull);
+extern void index_deform_tuple_internal(TupleDesc tupleDescriptor,
+										Datum *values, bool *isnull,
+										char *tp, bits8 *bp, int hasnulls);
+extern IndexTuple CopyIndexTuple(IndexTuple source);
+extern IndexTuple index_truncate_tuple(TupleDesc sourceDescriptor,
+									   IndexTuple source, int leavenatts);
+
+#endif							/* ITUP_H */
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
new file mode 100644
index 0000000..4bbb035
--- /dev/null
+++ b/src/include/access/multixact.h
@@ -0,0 +1,164 @@
+/*
+ * multixact.h
+ *
+ * PostgreSQL multi-transaction-log manager
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/multixact.h
+ */
+#ifndef MULTIXACT_H
+#define MULTIXACT_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/sync.h"
+
+
+/*
+ * The first two MultiXactId values are reserved to store the truncation Xid
+ * and epoch of the first segment, so we start assigning multixact values from
+ * 2.
+ */
+#define InvalidMultiXactId	((MultiXactId) 0)
+#define FirstMultiXactId	((MultiXactId) 1)
+#define MaxMultiXactId		((MultiXactId) 0xFFFFFFFF)
+
+#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
+
+#define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
+
+/* Number of SLRU buffers to use for multixact */
+#define NUM_MULTIXACTOFFSET_BUFFERS		8
+#define NUM_MULTIXACTMEMBER_BUFFERS		16
+
+/*
+ * Possible multixact lock modes ("status").  The first four modes are for
+ * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
+ * next two are used for update and delete modes.
+ */
+typedef enum
+{
+	MultiXactStatusForKeyShare = 0x00,
+	MultiXactStatusForShare = 0x01,
+	MultiXactStatusForNoKeyUpdate = 0x02,
+	MultiXactStatusForUpdate = 0x03,
+	/* an update that doesn't touch "key" columns */
+	MultiXactStatusNoKeyUpdate = 0x04,
+	/* other updates, and delete */
+	MultiXactStatusUpdate = 0x05
+} MultiXactStatus;
+
+#define MaxMultiXactStatus MultiXactStatusUpdate
+
+/* does a status value correspond to a tuple update? */
+#define ISUPDATE_from_mxstatus(status) \
+			((status) > MultiXactStatusForUpdate)
+
+
+typedef struct MultiXactMember
+{
+	TransactionId xid;
+	MultiXactStatus status;
+} MultiXactMember;
+
+
+/* ----------------
+ *		multixact-related XLOG entries
+ * ----------------
+ */
+
+#define XLOG_MULTIXACT_ZERO_OFF_PAGE	0x00
+#define XLOG_MULTIXACT_ZERO_MEM_PAGE	0x10
+#define XLOG_MULTIXACT_CREATE_ID		0x20
+#define XLOG_MULTIXACT_TRUNCATE_ID		0x30
+
+typedef struct xl_multixact_create
+{
+	MultiXactId mid;			/* new MultiXact's ID */
+	MultiXactOffset moff;		/* its starting offset in members file */
+	int32		nmembers;		/* number of member XIDs */
+	MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
+} xl_multixact_create;
+
+#define SizeOfMultiXactCreate (offsetof(xl_multixact_create, members))
+
+typedef struct xl_multixact_truncate
+{
+	Oid			oldestMultiDB;
+
+	/* to-be-truncated range of multixact offsets */
+	MultiXactId startTruncOff;	/* just for completeness' sake */
+	MultiXactId endTruncOff;
+
+	/* to-be-truncated range of multixact members */
+	MultiXactOffset startTruncMemb;
+	MultiXactOffset endTruncMemb;
+} xl_multixact_truncate;
+
+#define SizeOfMultiXactTruncate (sizeof(xl_multixact_truncate))
+
+
+extern MultiXactId MultiXactIdCreate(TransactionId xid1,
+									 MultiXactStatus status1, TransactionId xid2,
+									 MultiXactStatus status2);
+extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
+									 MultiXactStatus status);
+extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
+												MultiXactMember *members);
+
+extern MultiXactId ReadNextMultiXactId(void);
+extern void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next);
+extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
+extern void MultiXactIdSetOldestMember(void);
+extern int	GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
+								  bool allow_old, bool isLockOnly);
+extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
+extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
+										MultiXactId multi2);
+
+extern int	multixactoffsetssyncfiletag(const FileTag *ftag, char *path);
+extern int	multixactmemberssyncfiletag(const FileTag *ftag, char *path);
+
+extern void AtEOXact_MultiXact(void);
+extern void AtPrepare_MultiXact(void);
+extern void PostPrepare_MultiXact(TransactionId xid);
+
+extern Size MultiXactShmemSize(void);
+extern void MultiXactShmemInit(void);
+extern void BootStrapMultiXact(void);
+extern void StartupMultiXact(void);
+extern void TrimMultiXact(void);
+extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
+								Oid oldest_datoid,
+								bool is_startup);
+extern void MultiXactGetCheckptMulti(bool is_shutdown,
+									 MultiXactId *nextMulti,
+									 MultiXactOffset *nextMultiOffset,
+									 MultiXactId *oldestMulti,
+									 Oid *oldestMultiDB);
+extern void CheckPointMultiXact(void);
+extern MultiXactId GetOldestMultiXactId(void);
+extern void TruncateMultiXact(MultiXactId oldestMulti, Oid oldestMultiDB);
+extern void MultiXactSetNextMXact(MultiXactId nextMulti,
+								  MultiXactOffset nextMultiOffset);
+extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
+									  MultiXactOffset minMultiOffset);
+extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
+extern int	MultiXactMemberFreezeThreshold(void);
+
+extern void multixact_twophase_recover(TransactionId xid, uint16 info,
+									   void *recdata, uint32 len);
+extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
+										  void *recdata, uint32 len);
+extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
+										 void *recdata, uint32 len);
+
+extern void multixact_redo(XLogReaderState *record);
+extern void multixact_desc(StringInfo buf, XLogReaderState *record);
+extern const char *multixact_identify(uint8 info);
+extern char *mxid_to_string(MultiXactId multi, int nmembers,
+							MultiXactMember *members);
+
+#endif							/* MULTIXACT_H */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
new file mode 100644
index 0000000..30a216e
--- /dev/null
+++ b/src/include/access/nbtree.h
@@ -0,0 +1,1286 @@
+/*-------------------------------------------------------------------------
+ *
+ * nbtree.h
+ *	  header file for postgres btree access method implementation.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/nbtree.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NBTREE_H
+#define NBTREE_H
+
+#include "access/amapi.h"
+#include "access/itup.h"
+#include "access/sdir.h"
+#include "access/tableam.h"
+#include "access/xlogreader.h"
+#include "catalog/pg_am_d.h"
+#include "catalog/pg_index.h"
+#include "lib/stringinfo.h"
+#include "storage/bufmgr.h"
+#include "storage/shm_toc.h"
+
+/* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
+typedef uint16 BTCycleId;
+
+/*
+ *	BTPageOpaqueData -- At the end of every page, we store a pointer
+ *	to both siblings in the tree.  This is used to do forward/backward
+ *	index scans.  The next-page link is also critical for recovery when
+ *	a search has navigated to the wrong page due to concurrent page splits
+ *	or deletions; see src/backend/access/nbtree/README for more info.
+ *
+ *	In addition, we store the page's btree level (counting upwards from
+ *	zero at a leaf page) as well as some flag bits indicating the page type
+ *	and status.  If the page is deleted, a BTDeletedPageData struct is stored
+ *	in the page's tuple area, while a standard BTPageOpaqueData struct is
+ *	stored in the page special area.
+ *
+ *	We also store a "vacuum cycle ID".  When a page is split while VACUUM is
+ *	processing the index, a nonzero value associated with the VACUUM run is
+ *	stored into both halves of the split page.  (If VACUUM is not running,
+ *	both pages receive zero cycleids.)	This allows VACUUM to detect whether
+ *	a page was split since it started, with a small probability of false match
+ *	if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs
+ *	ago.  Also, during a split, the BTP_SPLIT_END flag is cleared in the left
+ *	(original) page, and set in the right page, but only if the next page
+ *	to its right has a different cycleid.
+ *
+ *	NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
+ *	instead.
+ *
+ *	NOTE: the btpo_level field used to be a union type in order to allow
+ *	deleted pages to store a 32-bit safexid in the same field.  We now store
+ *	64-bit/full safexid values using BTDeletedPageData instead.
+ */
+
+typedef struct BTPageOpaqueData
+{
+	BlockNumber btpo_prev;		/* left sibling, or P_NONE if leftmost */
+	BlockNumber btpo_next;		/* right sibling, or P_NONE if rightmost */
+	uint32		btpo_level;		/* tree level --- zero for leaf pages */
+	uint16		btpo_flags;		/* flag bits, see below */
+	BTCycleId	btpo_cycleid;	/* vacuum cycle ID of latest split */
+} BTPageOpaqueData;
+
+typedef BTPageOpaqueData *BTPageOpaque;
+
+/* Bits defined in btpo_flags */
+#define BTP_LEAF		(1 << 0)	/* leaf page, i.e. not internal page */
+#define BTP_ROOT		(1 << 1)	/* root page (has no parent) */
+#define BTP_DELETED		(1 << 2)	/* page has been deleted from tree */
+#define BTP_META		(1 << 3)	/* meta-page */
+#define BTP_HALF_DEAD	(1 << 4)	/* empty, but still in tree */
+#define BTP_SPLIT_END	(1 << 5)	/* rightmost page of split group */
+#define BTP_HAS_GARBAGE (1 << 6)	/* page has LP_DEAD tuples (deprecated) */
+#define BTP_INCOMPLETE_SPLIT (1 << 7)	/* right sibling's downlink is missing */
+#define BTP_HAS_FULLXID	(1 << 8)	/* contains BTDeletedPageData */
+
+/*
+ * The max allowed value of a cycle ID is a bit less than 64K.  This is
+ * for convenience of pg_filedump and similar utilities: we want to use
+ * the last 2 bytes of special space as an index type indicator, and
+ * restricting cycle ID lets btree use that space for vacuum cycle IDs
+ * while still allowing index type to be identified.
+ */
+#define MAX_BT_CYCLE_ID		0xFF7F
+
+
+/*
+ * The Meta page is always the first page in the btree index.
+ * Its primary purpose is to point to the location of the btree root page.
+ * We also point to the "fast" root, which is the current effective root;
+ * see README for discussion.
+ */
+
+typedef struct BTMetaPageData
+{
+	uint32		btm_magic;		/* should contain BTREE_MAGIC */
+	uint32		btm_version;	/* nbtree version (always <= BTREE_VERSION) */
+	BlockNumber btm_root;		/* current root location */
+	uint32		btm_level;		/* tree level of the root page */
+	BlockNumber btm_fastroot;	/* current "fast" root location */
+	uint32		btm_fastlevel;	/* tree level of the "fast" root page */
+	/* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */
+
+	/* number of deleted, non-recyclable pages during last cleanup */
+	uint32		btm_last_cleanup_num_delpages;
+	/* number of heap tuples during last cleanup (deprecated) */
+	float8		btm_last_cleanup_num_heap_tuples;
+
+	bool		btm_allequalimage;	/* are all columns "equalimage"? */
+} BTMetaPageData;
+
+#define BTPageGetMeta(p) \
+	((BTMetaPageData *) PageGetContents(p))
+
+/*
+ * The current Btree version is 4.  That's what you'll get when you create
+ * a new index.
+ *
+ * Btree version 3 was used in PostgreSQL v11.  It is mostly the same as
+ * version 4, but heap TIDs were not part of the keyspace.  Index tuples
+ * with duplicate keys could be stored in any order.  We continue to
+ * support reading and writing Btree versions 2 and 3, so that they don't
+ * need to be immediately re-indexed at pg_upgrade.  In order to get the
+ * new heapkeyspace semantics, however, a REINDEX is needed.
+ *
+ * Deduplication is safe to use when the btm_allequalimage field is set to
+ * true.  It's safe to read the btm_allequalimage field on version 3, but
+ * only version 4 indexes make use of deduplication.  Even version 4
+ * indexes created on PostgreSQL v12 will need a REINDEX to make use of
+ * deduplication, though, since there is no other way to set
+ * btm_allequalimage to true (pg_upgrade hasn't been taught to set the
+ * metapage field).
+ *
+ * Btree version 2 is mostly the same as version 3.  There are two new
+ * fields in the metapage that were introduced in version 3.  A version 2
+ * metapage will be automatically upgraded to version 3 on the first
+ * insert to it.  INCLUDE indexes cannot use version 2.
+ */
+#define BTREE_METAPAGE	0		/* first page is meta */
+#define BTREE_MAGIC		0x053162	/* magic number in metapage */
+#define BTREE_VERSION	4		/* current version number */
+#define BTREE_MIN_VERSION	2	/* minimum supported version */
+#define BTREE_NOVAC_VERSION	3	/* version with all meta fields set */
+
+/*
+ * Maximum size of a btree index entry, including its tuple header.
+ *
+ * We actually need to be able to fit three items on every page,
+ * so restrict any one item to 1/3 the per-page available space.
+ *
+ * There are rare cases where _bt_truncate() will need to enlarge
+ * a heap index tuple to make space for a tiebreaker heap TID
+ * attribute, which we account for here.
+ */
+#define BTMaxItemSize(page) \
+	MAXALIGN_DOWN((PageGetPageSize(page) - \
+				   MAXALIGN(SizeOfPageHeaderData + \
+							3*sizeof(ItemIdData)  + \
+							3*sizeof(ItemPointerData)) - \
+				   MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
+#define BTMaxItemSizeNoHeapTid(page) \
+	MAXALIGN_DOWN((PageGetPageSize(page) - \
+				   MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
+				   MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
+
+/*
+ * MaxTIDsPerBTreePage is an upper bound on the number of heap TIDs tuples
+ * that may be stored on a btree leaf page.  It is used to size the
+ * per-page temporary buffers.
+ *
+ * Note: we don't bother considering per-tuple overheads here to keep
+ * things simple (value is based on how many elements a single array of
+ * heap TIDs must have to fill the space between the page header and
+ * special area).  The value is slightly higher (i.e. more conservative)
+ * than necessary as a result, which is considered acceptable.
+ */
+#define MaxTIDsPerBTreePage \
+	(int) ((BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData)) / \
+		   sizeof(ItemPointerData))
+
+/*
+ * The leaf-page fillfactor defaults to 90% but is user-adjustable.
+ * For pages above the leaf level, we use a fixed 70% fillfactor.
+ * The fillfactor is applied during index build and when splitting
+ * a rightmost page; when splitting non-rightmost pages we try to
+ * divide the data equally.  When splitting a page that's entirely
+ * filled with a single value (duplicates), the effective leaf-page
+ * fillfactor is 96%, regardless of whether the page is a rightmost
+ * page.
+ */
+#define BTREE_MIN_FILLFACTOR		10
+#define BTREE_DEFAULT_FILLFACTOR	90
+#define BTREE_NONLEAF_FILLFACTOR	70
+#define BTREE_SINGLEVAL_FILLFACTOR	96
+
+/*
+ *	In general, the btree code tries to localize its knowledge about
+ *	page layout to a couple of routines.  However, we need a special
+ *	value to indicate "no page number" in those places where we expect
+ *	page numbers.  We can use zero for this because we never need to
+ *	make a pointer to the metadata page.
+ */
+
+#define P_NONE			0
+
+/*
+ * Macros to test whether a page is leftmost or rightmost on its tree level,
+ * as well as other state info kept in the opaque data.
+ */
+#define P_LEFTMOST(opaque)		((opaque)->btpo_prev == P_NONE)
+#define P_RIGHTMOST(opaque)		((opaque)->btpo_next == P_NONE)
+#define P_ISLEAF(opaque)		(((opaque)->btpo_flags & BTP_LEAF) != 0)
+#define P_ISROOT(opaque)		(((opaque)->btpo_flags & BTP_ROOT) != 0)
+#define P_ISDELETED(opaque)		(((opaque)->btpo_flags & BTP_DELETED) != 0)
+#define P_ISMETA(opaque)		(((opaque)->btpo_flags & BTP_META) != 0)
+#define P_ISHALFDEAD(opaque)	(((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)
+#define P_IGNORE(opaque)		(((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0)
+#define P_HAS_GARBAGE(opaque)	(((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)
+#define P_INCOMPLETE_SPLIT(opaque)	(((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0)
+#define P_HAS_FULLXID(opaque)	(((opaque)->btpo_flags & BTP_HAS_FULLXID) != 0)
+
+/*
+ * BTDeletedPageData is the page contents of a deleted page
+ */
+typedef struct BTDeletedPageData
+{
+	FullTransactionId safexid;	/* See BTPageIsRecyclable() */
+} BTDeletedPageData;
+
+static inline void
+BTPageSetDeleted(Page page, FullTransactionId safexid)
+{
+	BTPageOpaque opaque;
+	PageHeader	header;
+	BTDeletedPageData *contents;
+
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	header = ((PageHeader) page);
+
+	opaque->btpo_flags &= ~BTP_HALF_DEAD;
+	opaque->btpo_flags |= BTP_DELETED | BTP_HAS_FULLXID;
+	header->pd_lower = MAXALIGN(SizeOfPageHeaderData) +
+		sizeof(BTDeletedPageData);
+	header->pd_upper = header->pd_special;
+
+	/* Set safexid in deleted page */
+	contents = ((BTDeletedPageData *) PageGetContents(page));
+	contents->safexid = safexid;
+}
+
+static inline FullTransactionId
+BTPageGetDeleteXid(Page page)
+{
+	BTPageOpaque opaque;
+	BTDeletedPageData *contents;
+
+	/* We only expect to be called with a deleted page */
+	Assert(!PageIsNew(page));
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	Assert(P_ISDELETED(opaque));
+
+	/* pg_upgrade'd deleted page -- must be safe to delete now */
+	if (!P_HAS_FULLXID(opaque))
+		return FirstNormalFullTransactionId;
+
+	/* Get safexid from deleted page */
+	contents = ((BTDeletedPageData *) PageGetContents(page));
+	return contents->safexid;
+}
+
+/*
+ * Is an existing page recyclable?
+ *
+ * This exists to centralize the policy on which deleted pages are now safe to
+ * re-use.  However, _bt_pendingfsm_finalize() duplicates some of the same
+ * logic because it doesn't work directly with pages -- keep the two in sync.
+ *
+ * Note: PageIsNew() pages are always safe to recycle, but we can't deal with
+ * them here (caller is responsible for that case themselves).  Caller might
+ * well need special handling for new pages anyway.
+ */
+static inline bool
+BTPageIsRecyclable(Page page)
+{
+	BTPageOpaque opaque;
+
+	Assert(!PageIsNew(page));
+
+	/* Recycling okay iff page is deleted and safexid is old enough */
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	if (P_ISDELETED(opaque))
+	{
+		/*
+		 * The page was deleted, but when? If it was just deleted, a scan
+		 * might have seen the downlink to it, and will read the page later.
+		 * As long as that can happen, we must keep the deleted page around as
+		 * a tombstone.
+		 *
+		 * For that check if the deletion XID could still be visible to
+		 * anyone. If not, then no scan that's still in progress could have
+		 * seen its downlink, and we can recycle it.
+		 *
+		 * XXX: If we had the heap relation we could be more aggressive about
+		 * recycling deleted pages in non-catalog relations.  For now we just
+		 * pass NULL.  That is at least simple and consistent.
+		 */
+		return GlobalVisCheckRemovableFullXid(NULL, BTPageGetDeleteXid(page));
+	}
+
+	return false;
+}
+
+/*
+ * BTVacState and BTPendingFSM are private nbtree.c state used during VACUUM.
+ * They are exported for use by page deletion related code in nbtpage.c.
+ */
+typedef struct BTPendingFSM
+{
+	BlockNumber target;			/* Page deleted by current VACUUM */
+	FullTransactionId safexid;	/* Page's BTDeletedPageData.safexid */
+} BTPendingFSM;
+
+typedef struct BTVacState
+{
+	IndexVacuumInfo *info;
+	IndexBulkDeleteResult *stats;
+	IndexBulkDeleteCallback callback;
+	void	   *callback_state;
+	BTCycleId	cycleid;
+	MemoryContext pagedelcontext;
+
+	/*
+	 * _bt_pendingfsm_finalize() state
+	 */
+	int			bufsize;		/* pendingpages space (in # elements) */
+	int			maxbufsize;		/* max bufsize that respects work_mem */
+	BTPendingFSM *pendingpages; /* One entry per newly deleted page */
+	int			npendingpages;	/* current # valid pendingpages */
+} BTVacState;
+
+/*
+ *	Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
+ *	page.  The high key is not a tuple that is used to visit the heap.  It is
+ *	a pivot tuple (see "Notes on B-Tree tuple format" below for definition).
+ *	The high key on a page is required to be greater than or equal to any
+ *	other key that appears on the page.  If we find ourselves trying to
+ *	insert a key that is strictly > high key, we know we need to move right
+ *	(this should only happen if the page was split since we examined the
+ *	parent page).
+ *
+ *	Our insertion algorithm guarantees that we can use the initial least key
+ *	on our right sibling as the high key.  Once a page is created, its high
+ *	key changes only if the page is split.
+ *
+ *	On a non-rightmost page, the high key lives in item 1 and data items
+ *	start in item 2.  Rightmost pages have no high key, so we store data
+ *	items beginning in item 1.
+ */
+
+#define P_HIKEY				((OffsetNumber) 1)
+#define P_FIRSTKEY			((OffsetNumber) 2)
+#define P_FIRSTDATAKEY(opaque)	(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
+
+/*
+ * Notes on B-Tree tuple format, and key and non-key attributes:
+ *
+ * INCLUDE B-Tree indexes have non-key attributes.  These are extra
+ * attributes that may be returned by index-only scans, but do not influence
+ * the order of items in the index (formally, non-key attributes are not
+ * considered to be part of the key space).  Non-key attributes are only
+ * present in leaf index tuples whose item pointers actually point to heap
+ * tuples (non-pivot tuples).  _bt_check_natts() enforces the rules
+ * described here.
+ *
+ * Non-pivot tuple format (plain/non-posting variant):
+ *
+ *  t_tid | t_info | key values | INCLUDE columns, if any
+ *
+ * t_tid points to the heap TID, which is a tiebreaker key column as of
+ * BTREE_VERSION 4.
+ *
+ * Non-pivot tuples complement pivot tuples, which only have key columns.
+ * The sole purpose of pivot tuples is to represent how the key space is
+ * separated.  In general, any B-Tree index that has more than one level
+ * (i.e. any index that does not just consist of a metapage and a single
+ * leaf root page) must have some number of pivot tuples, since pivot
+ * tuples are used for traversing the tree.  Suffix truncation can omit
+ * trailing key columns when a new pivot is formed, which makes minus
+ * infinity their logical value.  Since BTREE_VERSION 4 indexes treat heap
+ * TID as a trailing key column that ensures that all index tuples are
+ * physically unique, it is necessary to represent heap TID as a trailing
+ * key column in pivot tuples, though very often this can be truncated
+ * away, just like any other key column. (Actually, the heap TID is
+ * omitted rather than truncated, since its representation is different to
+ * the non-pivot representation.)
+ *
+ * Pivot tuple format:
+ *
+ *  t_tid | t_info | key values | [heap TID]
+ *
+ * We store the number of columns present inside pivot tuples by abusing
+ * their t_tid offset field, since pivot tuples never need to store a real
+ * offset (pivot tuples generally store a downlink in t_tid, though).  The
+ * offset field only stores the number of columns/attributes when the
+ * INDEX_ALT_TID_MASK bit is set, which doesn't count the trailing heap
+ * TID column sometimes stored in pivot tuples -- that's represented by
+ * the presence of BT_PIVOT_HEAP_TID_ATTR.  The INDEX_ALT_TID_MASK bit in
+ * t_info is always set on BTREE_VERSION 4 pivot tuples, since
+ * BTreeTupleIsPivot() must work reliably on heapkeyspace versions.
+ *
+ * In version 2 or version 3 (!heapkeyspace) indexes, INDEX_ALT_TID_MASK
+ * might not be set in pivot tuples.  BTreeTupleIsPivot() won't work
+ * reliably as a result.  The number of columns stored is implicitly the
+ * same as the number of columns in the index, just like any non-pivot
+ * tuple. (The number of columns stored should not vary, since suffix
+ * truncation of key columns is unsafe within any !heapkeyspace index.)
+ *
+ * The 12 least significant bits from t_tid's offset number are used to
+ * represent the number of key columns within a pivot tuple.  This leaves 4
+ * status bits (BT_STATUS_OFFSET_MASK bits), which are shared by all tuples
+ * that have the INDEX_ALT_TID_MASK bit set (set in t_info) to store basic
+ * tuple metadata.  BTreeTupleIsPivot() and BTreeTupleIsPosting() use the
+ * BT_STATUS_OFFSET_MASK bits.
+ *
+ * Sometimes non-pivot tuples also use a representation that repurposes
+ * t_tid to store metadata rather than a TID.  PostgreSQL v13 introduced a
+ * new non-pivot tuple format to support deduplication: posting list
+ * tuples.  Deduplication merges together multiple equal non-pivot tuples
+ * into a logically equivalent, space efficient representation.  A posting
+ * list is an array of ItemPointerData elements.  Non-pivot tuples are
+ * merged together to form posting list tuples lazily, at the point where
+ * we'd otherwise have to split a leaf page.
+ *
+ * Posting tuple format (alternative non-pivot tuple representation):
+ *
+ *  t_tid | t_info | key values | posting list (TID array)
+ *
+ * Posting list tuples are recognized as such by having the
+ * INDEX_ALT_TID_MASK status bit set in t_info and the BT_IS_POSTING status
+ * bit set in t_tid's offset number.  These flags redefine the content of
+ * the posting tuple's t_tid to store the location of the posting list
+ * (instead of a block number), as well as the total number of heap TIDs
+ * present in the tuple (instead of a real offset number).
+ *
+ * The 12 least significant bits from t_tid's offset number are used to
+ * represent the number of heap TIDs present in the tuple, leaving 4 status
+ * bits (the BT_STATUS_OFFSET_MASK bits).  Like any non-pivot tuple, the
+ * number of columns stored is always implicitly the total number in the
+ * index (in practice there can never be non-key columns stored, since
+ * deduplication is not supported with INCLUDE indexes).
+ */
+#define INDEX_ALT_TID_MASK			INDEX_AM_RESERVED_BIT
+
+/* Item pointer offset bit masks */
+#define BT_OFFSET_MASK				0x0FFF
+#define BT_STATUS_OFFSET_MASK		0xF000
+/* BT_STATUS_OFFSET_MASK status bits */
+#define BT_PIVOT_HEAP_TID_ATTR		0x1000
+#define BT_IS_POSTING				0x2000
+
+/*
+ * Note: BTreeTupleIsPivot() can have false negatives (but not false
+ * positives) when used with !heapkeyspace indexes
+ */
+static inline bool
+BTreeTupleIsPivot(IndexTuple itup)
+{
+	if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
+		return false;
+	/* absence of BT_IS_POSTING in offset number indicates pivot tuple */
+	if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) != 0)
+		return false;
+
+	return true;
+}
+
+static inline bool
+BTreeTupleIsPosting(IndexTuple itup)
+{
+	if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
+		return false;
+	/* presence of BT_IS_POSTING in offset number indicates posting tuple */
+	if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) == 0)
+		return false;
+
+	return true;
+}
+
+static inline void
+BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
+{
+	Assert(nhtids > 1);
+	Assert((nhtids & BT_STATUS_OFFSET_MASK) == 0);
+	Assert((size_t) postingoffset == MAXALIGN(postingoffset));
+	Assert(postingoffset < INDEX_SIZE_MASK);
+	Assert(!BTreeTupleIsPivot(itup));
+
+	itup->t_info |= INDEX_ALT_TID_MASK;
+	ItemPointerSetOffsetNumber(&itup->t_tid, (nhtids | BT_IS_POSTING));
+	ItemPointerSetBlockNumber(&itup->t_tid, postingoffset);
+}
+
+static inline uint16
+BTreeTupleGetNPosting(IndexTuple posting)
+{
+	OffsetNumber existing;
+
+	Assert(BTreeTupleIsPosting(posting));
+
+	existing = ItemPointerGetOffsetNumberNoCheck(&posting->t_tid);
+	return (existing & BT_OFFSET_MASK);
+}
+
+static inline uint32
+BTreeTupleGetPostingOffset(IndexTuple posting)
+{
+	Assert(BTreeTupleIsPosting(posting));
+
+	return ItemPointerGetBlockNumberNoCheck(&posting->t_tid);
+}
+
+static inline ItemPointer
+BTreeTupleGetPosting(IndexTuple posting)
+{
+	return (ItemPointer) ((char *) posting +
+						  BTreeTupleGetPostingOffset(posting));
+}
+
+static inline ItemPointer
+BTreeTupleGetPostingN(IndexTuple posting, int n)
+{
+	return BTreeTupleGetPosting(posting) + n;
+}
+
+/*
+ * Get/set downlink block number in pivot tuple.
+ *
+ * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
+ * !heapkeyspace indexes would exhibit false positive assertion failures.
+ */
+static inline BlockNumber
+BTreeTupleGetDownLink(IndexTuple pivot)
+{
+	return ItemPointerGetBlockNumberNoCheck(&pivot->t_tid);
+}
+
+static inline void
+BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
+{
+	ItemPointerSetBlockNumber(&pivot->t_tid, blkno);
+}
+
+/*
+ * Get number of attributes within tuple.
+ *
+ * Note that this does not include an implicit tiebreaker heap TID
+ * attribute, if any.  Note also that the number of key attributes must be
+ * explicitly represented in all heapkeyspace pivot tuples.
+ *
+ * Note: This is defined as a macro rather than an inline function to
+ * avoid including rel.h.
+ */
+#define BTreeTupleGetNAtts(itup, rel)	\
+	( \
+		(BTreeTupleIsPivot(itup)) ? \
+		( \
+			ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_OFFSET_MASK \
+		) \
+		: \
+		IndexRelationGetNumberOfAttributes(rel) \
+	)
+
+/*
+ * Set number of key attributes in tuple.
+ *
+ * The heap TID tiebreaker attribute bit may also be set here, indicating that
+ * a heap TID value will be stored at the end of the tuple (i.e. using the
+ * special pivot tuple representation).
+ */
+static inline void
+BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid)
+{
+	Assert(nkeyatts <= INDEX_MAX_KEYS);
+	Assert((nkeyatts & BT_STATUS_OFFSET_MASK) == 0);
+	Assert(!heaptid || nkeyatts > 0);
+	Assert(!BTreeTupleIsPivot(itup) || nkeyatts == 0);
+
+	itup->t_info |= INDEX_ALT_TID_MASK;
+
+	if (heaptid)
+		nkeyatts |= BT_PIVOT_HEAP_TID_ATTR;
+
+	/* BT_IS_POSTING bit is deliberately unset here */
+	ItemPointerSetOffsetNumber(&itup->t_tid, nkeyatts);
+	Assert(BTreeTupleIsPivot(itup));
+}
+
+/*
+ * Get/set leaf page's "top parent" link from its high key.  Used during page
+ * deletion.
+ *
+ * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
+ * !heapkeyspace indexes would exhibit false positive assertion failures.
+ */
+static inline BlockNumber
+BTreeTupleGetTopParent(IndexTuple leafhikey)
+{
+	return ItemPointerGetBlockNumberNoCheck(&leafhikey->t_tid);
+}
+
+static inline void
+BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
+{
+	ItemPointerSetBlockNumber(&leafhikey->t_tid, blkno);
+	BTreeTupleSetNAtts(leafhikey, 0, false);
+}
+
+/*
+ * Get tiebreaker heap TID attribute, if any.
+ *
+ * This returns the first/lowest heap TID in the case of a posting list tuple.
+ */
+static inline ItemPointer
+BTreeTupleGetHeapTID(IndexTuple itup)
+{
+	if (BTreeTupleIsPivot(itup))
+	{
+		/* Pivot tuple heap TID representation? */
+		if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
+			 BT_PIVOT_HEAP_TID_ATTR) != 0)
+			return (ItemPointer) ((char *) itup + IndexTupleSize(itup) -
+								  sizeof(ItemPointerData));
+
+		/* Heap TID attribute was truncated */
+		return NULL;
+	}
+	else if (BTreeTupleIsPosting(itup))
+		return BTreeTupleGetPosting(itup);
+
+	return &itup->t_tid;
+}
+
+/*
+ * Get maximum heap TID attribute, which could be the only TID in the case of
+ * a non-pivot tuple that does not have a posting list tuple.
+ *
+ * Works with non-pivot tuples only.
+ */
+static inline ItemPointer
+BTreeTupleGetMaxHeapTID(IndexTuple itup)
+{
+	Assert(!BTreeTupleIsPivot(itup));
+
+	if (BTreeTupleIsPosting(itup))
+	{
+		uint16		nposting = BTreeTupleGetNPosting(itup);
+
+		return BTreeTupleGetPostingN(itup, nposting - 1);
+	}
+
+	return &itup->t_tid;
+}
+
+/*
+ *	Operator strategy numbers for B-tree have been moved to access/stratnum.h,
+ *	because many places need to use them in ScanKeyInit() calls.
+ *
+ *	The strategy numbers are chosen so that we can commute them by
+ *	subtraction, thus:
+ */
+#define BTCommuteStrategyNumber(strat)	(BTMaxStrategyNumber + 1 - (strat))
+
+/*
+ *	When a new operator class is declared, we require that the user
+ *	supply us with an amproc procedure (BTORDER_PROC) for determining
+ *	whether, for two keys a and b, a < b, a = b, or a > b.  This routine
+ *	must return < 0, 0, > 0, respectively, in these three cases.
+ *
+ *	To facilitate accelerated sorting, an operator class may choose to
+ *	offer a second procedure (BTSORTSUPPORT_PROC).  For full details, see
+ *	src/include/utils/sortsupport.h.
+ *
+ *	To support window frames defined by "RANGE offset PRECEDING/FOLLOWING",
+ *	an operator class may choose to offer a third amproc procedure
+ *	(BTINRANGE_PROC), independently of whether it offers sortsupport.
+ *	For full details, see doc/src/sgml/btree.sgml.
+ *
+ *	To facilitate B-Tree deduplication, an operator class may choose to
+ *	offer a forth amproc procedure (BTEQUALIMAGE_PROC).  For full details,
+ *	see doc/src/sgml/btree.sgml.
+ */
+
+#define BTORDER_PROC		1
+#define BTSORTSUPPORT_PROC	2
+#define BTINRANGE_PROC		3
+#define BTEQUALIMAGE_PROC	4
+#define BTOPTIONS_PROC		5
+#define BTNProcs			5
+
+/*
+ *	We need to be able to tell the difference between read and write
+ *	requests for pages, in order to do locking correctly.
+ */
+
+#define BT_READ			BUFFER_LOCK_SHARE
+#define BT_WRITE		BUFFER_LOCK_EXCLUSIVE
+
+/*
+ * BTStackData -- As we descend a tree, we push the location of pivot
+ * tuples whose downlink we are about to follow onto a private stack.  If
+ * we split a leaf, we use this stack to walk back up the tree and insert
+ * data into its parent page at the correct location.  We also have to
+ * recursively insert into the grandparent page if and when the parent page
+ * splits.  Our private stack can become stale due to concurrent page
+ * splits and page deletions, but it should never give us an irredeemably
+ * bad picture.
+ */
+typedef struct BTStackData
+{
+	BlockNumber bts_blkno;
+	OffsetNumber bts_offset;
+	struct BTStackData *bts_parent;
+} BTStackData;
+
+typedef BTStackData *BTStack;
+
+/*
+ * BTScanInsertData is the btree-private state needed to find an initial
+ * position for an indexscan, or to insert new tuples -- an "insertion
+ * scankey" (not to be confused with a search scankey).  It's used to descend
+ * a B-Tree using _bt_search.
+ *
+ * heapkeyspace indicates if we expect all keys in the index to be physically
+ * unique because heap TID is used as a tiebreaker attribute, and if index may
+ * have truncated key attributes in pivot tuples.  This is actually a property
+ * of the index relation itself (not an indexscan).  heapkeyspace indexes are
+ * indexes whose version is >= version 4.  It's convenient to keep this close
+ * by, rather than accessing the metapage repeatedly.
+ *
+ * allequalimage is set to indicate that deduplication is safe for the index.
+ * This is also a property of the index relation rather than an indexscan.
+ *
+ * anynullkeys indicates if any of the keys had NULL value when scankey was
+ * built from index tuple (note that already-truncated tuple key attributes
+ * set NULL as a placeholder key value, which also affects value of
+ * anynullkeys).  This is a convenience for unique index non-pivot tuple
+ * insertion, which usually temporarily unsets scantid, but shouldn't iff
+ * anynullkeys is true.  Value generally matches non-pivot tuple's HasNulls
+ * bit, but may not when inserting into an INCLUDE index (tuple header value
+ * is affected by the NULL-ness of both key and non-key attributes).
+ *
+ * When nextkey is false (the usual case), _bt_search and _bt_binsrch will
+ * locate the first item >= scankey.  When nextkey is true, they will locate
+ * the first item > scan key.
+ *
+ * pivotsearch is set to true by callers that want to re-find a leaf page
+ * using a scankey built from a leaf page's high key.  Most callers set this
+ * to false.
+ *
+ * scantid is the heap TID that is used as a final tiebreaker attribute.  It
+ * is set to NULL when index scan doesn't need to find a position for a
+ * specific physical tuple.  Must be set when inserting new tuples into
+ * heapkeyspace indexes, since every tuple in the tree unambiguously belongs
+ * in one exact position (it's never set with !heapkeyspace indexes, though).
+ * Despite the representational difference, nbtree search code considers
+ * scantid to be just another insertion scankey attribute.
+ *
+ * scankeys is an array of scan key entries for attributes that are compared
+ * before scantid (user-visible attributes).  keysz is the size of the array.
+ * During insertion, there must be a scan key for every attribute, but when
+ * starting a regular index scan some can be omitted.  The array is used as a
+ * flexible array member, though it's sized in a way that makes it possible to
+ * use stack allocations.  See nbtree/README for full details.
+ */
+typedef struct BTScanInsertData
+{
+	bool		heapkeyspace;
+	bool		allequalimage;
+	bool		anynullkeys;
+	bool		nextkey;
+	bool		pivotsearch;
+	ItemPointer scantid;		/* tiebreaker for scankeys */
+	int			keysz;			/* Size of scankeys array */
+	ScanKeyData scankeys[INDEX_MAX_KEYS];	/* Must appear last */
+} BTScanInsertData;
+
+typedef BTScanInsertData *BTScanInsert;
+
+/*
+ * BTInsertStateData is a working area used during insertion.
+ *
+ * This is filled in after descending the tree to the first leaf page the new
+ * tuple might belong on.  Tracks the current position while performing
+ * uniqueness check, before we have determined which exact page to insert
+ * to.
+ *
+ * (This should be private to nbtinsert.c, but it's also used by
+ * _bt_binsrch_insert)
+ */
+typedef struct BTInsertStateData
+{
+	IndexTuple	itup;			/* Item we're inserting */
+	Size		itemsz;			/* Size of itup -- should be MAXALIGN()'d */
+	BTScanInsert itup_key;		/* Insertion scankey */
+
+	/* Buffer containing leaf page we're likely to insert itup on */
+	Buffer		buf;
+
+	/*
+	 * Cache of bounds within the current buffer.  Only used for insertions
+	 * where _bt_check_unique is called.  See _bt_binsrch_insert and
+	 * _bt_findinsertloc for details.
+	 */
+	bool		bounds_valid;
+	OffsetNumber low;
+	OffsetNumber stricthigh;
+
+	/*
+	 * if _bt_binsrch_insert found the location inside existing posting list,
+	 * save the position inside the list.  -1 sentinel value indicates overlap
+	 * with an existing posting list tuple that has its LP_DEAD bit set.
+	 */
+	int			postingoff;
+} BTInsertStateData;
+
+typedef BTInsertStateData *BTInsertState;
+
+/*
+ * State used to representing an individual pending tuple during
+ * deduplication.
+ */
+typedef struct BTDedupInterval
+{
+	OffsetNumber baseoff;
+	uint16		nitems;
+} BTDedupInterval;
+
+/*
+ * BTDedupStateData is a working area used during deduplication.
+ *
+ * The status info fields track the state of a whole-page deduplication pass.
+ * State about the current pending posting list is also tracked.
+ *
+ * A pending posting list is comprised of a contiguous group of equal items
+ * from the page, starting from page offset number 'baseoff'.  This is the
+ * offset number of the "base" tuple for new posting list.  'nitems' is the
+ * current total number of existing items from the page that will be merged to
+ * make a new posting list tuple, including the base tuple item.  (Existing
+ * items may themselves be posting list tuples, or regular non-pivot tuples.)
+ *
+ * The total size of the existing tuples to be freed when pending posting list
+ * is processed gets tracked by 'phystupsize'.  This information allows
+ * deduplication to calculate the space saving for each new posting list
+ * tuple, and for the entire pass over the page as a whole.
+ */
+typedef struct BTDedupStateData
+{
+	/* Deduplication status info for entire pass over page */
+	bool		deduplicate;	/* Still deduplicating page? */
+	int			nmaxitems;		/* Number of max-sized tuples so far */
+	Size		maxpostingsize; /* Limit on size of final tuple */
+
+	/* Metadata about base tuple of current pending posting list */
+	IndexTuple	base;			/* Use to form new posting list */
+	OffsetNumber baseoff;		/* page offset of base */
+	Size		basetupsize;	/* base size without original posting list */
+
+	/* Other metadata about pending posting list */
+	ItemPointer htids;			/* Heap TIDs in pending posting list */
+	int			nhtids;			/* Number of heap TIDs in htids array */
+	int			nitems;			/* Number of existing tuples/line pointers */
+	Size		phystupsize;	/* Includes line pointer overhead */
+
+	/*
+	 * Array of tuples to go on new version of the page.  Contains one entry
+	 * for each group of consecutive items.  Note that existing tuples that
+	 * will not become posting list tuples do not appear in the array (they
+	 * are implicitly unchanged by deduplication pass).
+	 */
+	int			nintervals;		/* current number of intervals in array */
+	BTDedupInterval intervals[MaxIndexTuplesPerPage];
+} BTDedupStateData;
+
+typedef BTDedupStateData *BTDedupState;
+
+/*
+ * BTVacuumPostingData is state that represents how to VACUUM (or delete) a
+ * posting list tuple when some (though not all) of its TIDs are to be
+ * deleted.
+ *
+ * Convention is that itup field is the original posting list tuple on input,
+ * and palloc()'d final tuple used to overwrite existing tuple on output.
+ */
+typedef struct BTVacuumPostingData
+{
+	/* Tuple that will be/was updated */
+	IndexTuple	itup;
+	OffsetNumber updatedoffset;
+
+	/* State needed to describe final itup in WAL */
+	uint16		ndeletedtids;
+	uint16		deletetids[FLEXIBLE_ARRAY_MEMBER];
+} BTVacuumPostingData;
+
+typedef BTVacuumPostingData *BTVacuumPosting;
+
+/*
+ * BTScanOpaqueData is the btree-private state needed for an indexscan.
+ * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
+ * details of the preprocessing), information about the current location
+ * of the scan, and information about the marked location, if any.  (We use
+ * BTScanPosData to represent the data needed for each of current and marked
+ * locations.)	In addition we can remember some known-killed index entries
+ * that must be marked before we can move off the current page.
+ *
+ * Index scans work a page at a time: we pin and read-lock the page, identify
+ * all the matching items on the page and save them in BTScanPosData, then
+ * release the read-lock while returning the items to the caller for
+ * processing.  This approach minimizes lock/unlock traffic.  Note that we
+ * keep the pin on the index page until the caller is done with all the items
+ * (this is needed for VACUUM synchronization, see nbtree/README).  When we
+ * are ready to step to the next page, if the caller has told us any of the
+ * items were killed, we re-lock the page to mark them killed, then unlock.
+ * Finally we drop the pin and step to the next page in the appropriate
+ * direction.
+ *
+ * If we are doing an index-only scan, we save the entire IndexTuple for each
+ * matched item, otherwise only its heap TID and offset.  The IndexTuples go
+ * into a separate workspace array; each BTScanPosItem stores its tuple's
+ * offset within that array.  Posting list tuples store a "base" tuple once,
+ * allowing the same key to be returned for each TID in the posting list
+ * tuple.
+ */
+
+typedef struct BTScanPosItem	/* what we remember about each match */
+{
+	ItemPointerData heapTid;	/* TID of referenced heap item */
+	OffsetNumber indexOffset;	/* index item's location within page */
+	LocationIndex tupleOffset;	/* IndexTuple's offset in workspace, if any */
+} BTScanPosItem;
+
+typedef struct BTScanPosData
+{
+	Buffer		buf;			/* if valid, the buffer is pinned */
+
+	XLogRecPtr	lsn;			/* pos in the WAL stream when page was read */
+	BlockNumber currPage;		/* page referenced by items array */
+	BlockNumber nextPage;		/* page's right link when we scanned it */
+
+	/*
+	 * moreLeft and moreRight track whether we think there may be matching
+	 * index entries to the left and right of the current page, respectively.
+	 * We can clear the appropriate one of these flags when _bt_checkkeys()
+	 * returns continuescan = false.
+	 */
+	bool		moreLeft;
+	bool		moreRight;
+
+	/*
+	 * If we are doing an index-only scan, nextTupleOffset is the first free
+	 * location in the associated tuple storage workspace.
+	 */
+	int			nextTupleOffset;
+
+	/*
+	 * The items array is always ordered in index order (ie, increasing
+	 * indexoffset).  When scanning backwards it is convenient to fill the
+	 * array back-to-front, so we start at the last slot and fill downwards.
+	 * Hence we need both a first-valid-entry and a last-valid-entry counter.
+	 * itemIndex is a cursor showing which entry was last returned to caller.
+	 */
+	int			firstItem;		/* first valid index in items[] */
+	int			lastItem;		/* last valid index in items[] */
+	int			itemIndex;		/* current index in items[] */
+
+	BTScanPosItem items[MaxTIDsPerBTreePage];	/* MUST BE LAST */
+} BTScanPosData;
+
+typedef BTScanPosData *BTScanPos;
+
+#define BTScanPosIsPinned(scanpos) \
+( \
+	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
+				!BufferIsValid((scanpos).buf)), \
+	BufferIsValid((scanpos).buf) \
+)
+#define BTScanPosUnpin(scanpos) \
+	do { \
+		ReleaseBuffer((scanpos).buf); \
+		(scanpos).buf = InvalidBuffer; \
+	} while (0)
+#define BTScanPosUnpinIfPinned(scanpos) \
+	do { \
+		if (BTScanPosIsPinned(scanpos)) \
+			BTScanPosUnpin(scanpos); \
+	} while (0)
+
+#define BTScanPosIsValid(scanpos) \
+( \
+	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
+				!BufferIsValid((scanpos).buf)), \
+	BlockNumberIsValid((scanpos).currPage) \
+)
+#define BTScanPosInvalidate(scanpos) \
+	do { \
+		(scanpos).currPage = InvalidBlockNumber; \
+		(scanpos).nextPage = InvalidBlockNumber; \
+		(scanpos).buf = InvalidBuffer; \
+		(scanpos).lsn = InvalidXLogRecPtr; \
+		(scanpos).nextTupleOffset = 0; \
+	} while (0)
+
+/* We need one of these for each equality-type SK_SEARCHARRAY scan key */
+typedef struct BTArrayKeyInfo
+{
+	int			scan_key;		/* index of associated key in arrayKeyData */
+	int			cur_elem;		/* index of current element in elem_values */
+	int			mark_elem;		/* index of marked element in elem_values */
+	int			num_elems;		/* number of elems in current array value */
+	Datum	   *elem_values;	/* array of num_elems Datums */
+} BTArrayKeyInfo;
+
+typedef struct BTScanOpaqueData
+{
+	/* these fields are set by _bt_preprocess_keys(): */
+	bool		qual_ok;		/* false if qual can never be satisfied */
+	int			numberOfKeys;	/* number of preprocessed scan keys */
+	ScanKey		keyData;		/* array of preprocessed scan keys */
+
+	/* workspace for SK_SEARCHARRAY support */
+	ScanKey		arrayKeyData;	/* modified copy of scan->keyData */
+	int			numArrayKeys;	/* number of equality-type array keys (-1 if
+								 * there are any unsatisfiable array keys) */
+	int			arrayKeyCount;	/* count indicating number of array scan keys
+								 * processed */
+	BTArrayKeyInfo *arrayKeys;	/* info about each equality-type array key */
+	MemoryContext arrayContext; /* scan-lifespan context for array data */
+
+	/* info about killed items if any (killedItems is NULL if never used) */
+	int		   *killedItems;	/* currPos.items indexes of killed items */
+	int			numKilled;		/* number of currently stored items */
+
+	/*
+	 * If we are doing an index-only scan, these are the tuple storage
+	 * workspaces for the currPos and markPos respectively.  Each is of size
+	 * BLCKSZ, so it can hold as much as a full page's worth of tuples.
+	 */
+	char	   *currTuples;		/* tuple storage for currPos */
+	char	   *markTuples;		/* tuple storage for markPos */
+
+	/*
+	 * If the marked position is on the same page as current position, we
+	 * don't use markPos, but just keep the marked itemIndex in markItemIndex
+	 * (all the rest of currPos is valid for the mark position). Hence, to
+	 * determine if there is a mark, first look at markItemIndex, then at
+	 * markPos.
+	 */
+	int			markItemIndex;	/* itemIndex, or -1 if not valid */
+
+	/* keep these last in struct for efficiency */
+	BTScanPosData currPos;		/* current position data */
+	BTScanPosData markPos;		/* marked position, if any */
+} BTScanOpaqueData;
+
+typedef BTScanOpaqueData *BTScanOpaque;
+
+/*
+ * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
+ * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
+ * index's indoption[] array entry for the index attribute.
+ */
+#define SK_BT_REQFWD	0x00010000	/* required to continue forward scan */
+#define SK_BT_REQBKWD	0x00020000	/* required to continue backward scan */
+#define SK_BT_INDOPTION_SHIFT  24	/* must clear the above bits */
+#define SK_BT_DESC			(INDOPTION_DESC << SK_BT_INDOPTION_SHIFT)
+#define SK_BT_NULLS_FIRST	(INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT)
+
+typedef struct BTOptions
+{
+	int32		varlena_header_;	/* varlena header (do not touch directly!) */
+	int			fillfactor;		/* page fill factor in percent (0..100) */
+	float8		vacuum_cleanup_index_scale_factor;	/* deprecated */
+	bool		deduplicate_items;	/* Try to deduplicate items? */
+} BTOptions;
+
+#define BTGetFillFactor(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == BTREE_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((BTOptions *) (relation)->rd_options)->fillfactor : \
+	 BTREE_DEFAULT_FILLFACTOR)
+#define BTGetTargetPageFreeSpace(relation) \
+	(BLCKSZ * (100 - BTGetFillFactor(relation)) / 100)
+#define BTGetDeduplicateItems(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == BTREE_AM_OID), \
+	((relation)->rd_options ? \
+	 ((BTOptions *) (relation)->rd_options)->deduplicate_items : true))
+
+/*
+ * Constant definition for progress reporting.  Phase numbers must match
+ * btbuildphasename.
+ */
+/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
+#define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN		2
+#define PROGRESS_BTREE_PHASE_PERFORMSORT_1				3
+#define PROGRESS_BTREE_PHASE_PERFORMSORT_2				4
+#define PROGRESS_BTREE_PHASE_LEAF_LOAD					5
+
+/*
+ * external entry points for btree, in nbtree.c
+ */
+extern void btbuildempty(Relation index);
+extern bool btinsert(Relation rel, Datum *values, bool *isnull,
+					 ItemPointer ht_ctid, Relation heapRel,
+					 IndexUniqueCheck checkUnique,
+					 bool indexUnchanged,
+					 struct IndexInfo *indexInfo);
+extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
+extern Size btestimateparallelscan(void);
+extern void btinitparallelscan(void *target);
+extern bool btgettuple(IndexScanDesc scan, ScanDirection dir);
+extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+					 ScanKey orderbys, int norderbys);
+extern void btparallelrescan(IndexScanDesc scan);
+extern void btendscan(IndexScanDesc scan);
+extern void btmarkpos(IndexScanDesc scan);
+extern void btrestrpos(IndexScanDesc scan);
+extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info,
+										   IndexBulkDeleteResult *stats,
+										   IndexBulkDeleteCallback callback,
+										   void *callback_state);
+extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info,
+											  IndexBulkDeleteResult *stats);
+extern bool btcanreturn(Relation index, int attno);
+
+/*
+ * prototypes for internal functions in nbtree.c
+ */
+extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno);
+extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
+extern void _bt_parallel_done(IndexScanDesc scan);
+extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
+
+/*
+ * prototypes for functions in nbtdedup.c
+ */
+extern void _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel,
+						   IndexTuple newitem, Size newitemsz,
+						   bool bottomupdedup);
+extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel,
+								 Size newitemsz);
+extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base,
+									OffsetNumber baseoff);
+extern bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup);
+extern Size _bt_dedup_finish_pending(Page newpage, BTDedupState state);
+extern IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids,
+								   int nhtids);
+extern void _bt_update_posting(BTVacuumPosting vacposting);
+extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting,
+								   int postingoff);
+
+/*
+ * prototypes for functions in nbtinsert.c
+ */
+extern bool _bt_doinsert(Relation rel, IndexTuple itup,
+						 IndexUniqueCheck checkUnique, bool indexUnchanged,
+						 Relation heapRel);
+extern void _bt_finish_split(Relation rel, Buffer lbuf, BTStack stack);
+extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, BlockNumber child);
+
+/*
+ * prototypes for functions in nbtsplitloc.c
+ */
+extern OffsetNumber _bt_findsplitloc(Relation rel, Page origpage,
+									 OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
+									 bool *newitemonleft);
+
+/*
+ * prototypes for functions in nbtpage.c
+ */
+extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
+							 bool allequalimage);
+extern bool _bt_vacuum_needs_cleanup(Relation rel);
+extern void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages);
+extern void _bt_upgrademetapage(Page page);
+extern Buffer _bt_getroot(Relation rel, int access);
+extern Buffer _bt_gettrueroot(Relation rel);
+extern int	_bt_getrootheight(Relation rel);
+extern void _bt_metaversion(Relation rel, bool *heapkeyspace,
+							bool *allequalimage);
+extern void _bt_checkpage(Relation rel, Buffer buf);
+extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
+extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
+							   BlockNumber blkno, int access);
+extern void _bt_relbuf(Relation rel, Buffer buf);
+extern void _bt_lockbuf(Relation rel, Buffer buf, int access);
+extern void _bt_unlockbuf(Relation rel, Buffer buf);
+extern bool _bt_conditionallockbuf(Relation rel, Buffer buf);
+extern void _bt_upgradelockbufcleanup(Relation rel, Buffer buf);
+extern void _bt_pageinit(Page page, Size size);
+extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
+								OffsetNumber *deletable, int ndeletable,
+								BTVacuumPosting *updatable, int nupdatable);
+extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
+									  Relation heapRel,
+									  TM_IndexDeleteOp *delstate);
+extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
+extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate,
+								bool cleanuponly);
+extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
+
+/*
+ * prototypes for functions in nbtsearch.c
+ */
+extern BTStack _bt_search(Relation rel, BTScanInsert key, Buffer *bufP,
+						  int access, Snapshot snapshot);
+extern Buffer _bt_moveright(Relation rel, BTScanInsert key, Buffer buf,
+							bool forupdate, BTStack stack, int access, Snapshot snapshot);
+extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
+extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
+extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
+extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
+							   Snapshot snapshot);
+
+/*
+ * prototypes for functions in nbtutils.c
+ */
+extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup);
+extern void _bt_freestack(BTStack stack);
+extern void _bt_preprocess_array_keys(IndexScanDesc scan);
+extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
+extern void _bt_mark_array_keys(IndexScanDesc scan);
+extern void _bt_restore_array_keys(IndexScanDesc scan);
+extern void _bt_preprocess_keys(IndexScanDesc scan);
+extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
+						  int tupnatts, ScanDirection dir, bool *continuescan);
+extern void _bt_killitems(IndexScanDesc scan);
+extern BTCycleId _bt_vacuum_cycleid(Relation rel);
+extern BTCycleId _bt_start_vacuum(Relation rel);
+extern void _bt_end_vacuum(Relation rel);
+extern void _bt_end_vacuum_callback(int code, Datum arg);
+extern Size BTreeShmemSize(void);
+extern void BTreeShmemInit(void);
+extern bytea *btoptions(Datum reloptions, bool validate);
+extern bool btproperty(Oid index_oid, int attno,
+					   IndexAMProperty prop, const char *propname,
+					   bool *res, bool *isnull);
+extern char *btbuildphasename(int64 phasenum);
+extern IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft,
+							   IndexTuple firstright, BTScanInsert itup_key);
+extern int	_bt_keep_natts_fast(Relation rel, IndexTuple lastleft,
+								IndexTuple firstright);
+extern bool _bt_check_natts(Relation rel, bool heapkeyspace, Page page,
+							OffsetNumber offnum);
+extern void _bt_check_third_page(Relation rel, Relation heap,
+								 bool needheaptidspace, Page page, IndexTuple newtup);
+extern bool _bt_allequalimage(Relation rel, bool debugmessage);
+
+/*
+ * prototypes for functions in nbtvalidate.c
+ */
+extern bool btvalidate(Oid opclassoid);
+extern void btadjustmembers(Oid opfamilyoid,
+							Oid opclassoid,
+							List *operators,
+							List *functions);
+
+/*
+ * prototypes for functions in nbtsort.c
+ */
+extern IndexBuildResult *btbuild(Relation heap, Relation index,
+								 struct IndexInfo *indexInfo);
+extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc);
+
+#endif							/* NBTREE_H */
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
new file mode 100644
index 0000000..0f77318
--- /dev/null
+++ b/src/include/access/nbtxlog.h
@@ -0,0 +1,351 @@
+/*-------------------------------------------------------------------------
+ *
+ * nbtxlog.h
+ *	  header file for postgres btree xlog routines
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/nbtxlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NBTXLOG_H
+#define NBTXLOG_H
+
+#include "access/transam.h"
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/off.h"
+
+/*
+ * XLOG records for btree operations
+ *
+ * XLOG allows to store some information in high 4 bits of log
+ * record xl_info field
+ */
+#define XLOG_BTREE_INSERT_LEAF	0x00	/* add index tuple without split */
+#define XLOG_BTREE_INSERT_UPPER 0x10	/* same, on a non-leaf page */
+#define XLOG_BTREE_INSERT_META	0x20	/* same, plus update metapage */
+#define XLOG_BTREE_SPLIT_L		0x30	/* add index tuple with split */
+#define XLOG_BTREE_SPLIT_R		0x40	/* as above, new item on right */
+#define XLOG_BTREE_INSERT_POST	0x50	/* add index tuple with posting split */
+#define XLOG_BTREE_DEDUP		0x60	/* deduplicate tuples for a page */
+#define XLOG_BTREE_DELETE		0x70	/* delete leaf index tuples for a page */
+#define XLOG_BTREE_UNLINK_PAGE	0x80	/* delete a half-dead page */
+#define XLOG_BTREE_UNLINK_PAGE_META 0x90	/* same, and update metapage */
+#define XLOG_BTREE_NEWROOT		0xA0	/* new root page */
+#define XLOG_BTREE_MARK_PAGE_HALFDEAD 0xB0	/* mark a leaf as half-dead */
+#define XLOG_BTREE_VACUUM		0xC0	/* delete entries on a page during
+										 * vacuum */
+#define XLOG_BTREE_REUSE_PAGE	0xD0	/* old page is about to be reused from
+										 * FSM */
+#define XLOG_BTREE_META_CLEANUP	0xE0	/* update cleanup-related data in the
+										 * metapage */
+
+/*
+ * All that we need to regenerate the meta-data page
+ */
+typedef struct xl_btree_metadata
+{
+	uint32		version;
+	BlockNumber root;
+	uint32		level;
+	BlockNumber fastroot;
+	uint32		fastlevel;
+	uint32		last_cleanup_num_delpages;
+	bool		allequalimage;
+} xl_btree_metadata;
+
+/*
+ * This is what we need to know about simple (without split) insert.
+ *
+ * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META, and
+ * INSERT_POST.  Note that INSERT_META and INSERT_UPPER implies it's not a
+ * leaf page, while INSERT_POST and INSERT_LEAF imply that it must be a leaf
+ * page.
+ *
+ * Backup Blk 0: original page
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
+ *
+ * Note: The new tuple is actually the "original" new item in the posting
+ * list split insert case (i.e. the INSERT_POST case).  A split offset for
+ * the posting list is logged before the original new item.  Recovery needs
+ * both, since it must do an in-place update of the existing posting list
+ * that was split as an extra step.  Also, recovery generates a "final"
+ * newitem.  See _bt_swap_posting() for details on posting list splits.
+ */
+typedef struct xl_btree_insert
+{
+	OffsetNumber offnum;
+
+	/* POSTING SPLIT OFFSET FOLLOWS (INSERT_POST case) */
+	/* NEW TUPLE ALWAYS FOLLOWS AT THE END */
+} xl_btree_insert;
+
+#define SizeOfBtreeInsert	(offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
+
+/*
+ * On insert with split, we save all the items going into the right sibling
+ * so that we can restore it completely from the log record.  This way takes
+ * less xlog space than the normal approach, because if we did it standardly,
+ * XLogInsert would almost always think the right page is new and store its
+ * whole page image.  The left page, however, is handled in the normal
+ * incremental-update fashion.
+ *
+ * Note: XLOG_BTREE_SPLIT_L and XLOG_BTREE_SPLIT_R share this data record.
+ * There are two variants to indicate whether the inserted tuple went into the
+ * left or right split page (and thus, whether the new item is stored or not).
+ * We always log the left page high key because suffix truncation can generate
+ * a new leaf high key using user-defined code.  This is also necessary on
+ * internal pages, since the firstright item that the left page's high key was
+ * based on will have been truncated to zero attributes in the right page (the
+ * separator key is unavailable from the right page).
+ *
+ * Backup Blk 0: original page / new left page
+ *
+ * The left page's data portion contains the new item, if it's the _L variant.
+ * _R variant split records generally do not have a newitem (_R variant leaf
+ * page split records that must deal with a posting list split will include an
+ * explicit newitem, though it is never used on the right page -- it is
+ * actually an orignewitem needed to update existing posting list).  The new
+ * high key of the left/original page appears last of all (and must always be
+ * present).
+ *
+ * Page split records that need the REDO routine to deal with a posting list
+ * split directly will have an explicit newitem, which is actually an
+ * orignewitem (the newitem as it was before the posting list split, not
+ * after).  A posting list split always has a newitem that comes immediately
+ * after the posting list being split (which would have overlapped with
+ * orignewitem prior to split).  Usually REDO must deal with posting list
+ * splits with an _L variant page split record, and usually both the new
+ * posting list and the final newitem go on the left page (the existing
+ * posting list will be inserted instead of the old, and the final newitem
+ * will be inserted next to that).  However, _R variant split records will
+ * include an orignewitem when the split point for the page happens to have a
+ * lastleft tuple that is also the posting list being split (leaving newitem
+ * as the page split's firstright tuple).  The existence of this corner case
+ * does not change the basic fact about newitem/orignewitem for the REDO
+ * routine: it is always state used for the left page alone.  (This is why the
+ * record's postingoff field isn't a reliable indicator of whether or not a
+ * posting list split occurred during the page split; a non-zero value merely
+ * indicates that the REDO routine must reconstruct a new posting list tuple
+ * that is needed for the left page.)
+ *
+ * This posting list split handling is equivalent to the xl_btree_insert REDO
+ * routine's INSERT_POST handling.  While the details are more complicated
+ * here, the concept and goals are exactly the same.  See _bt_swap_posting()
+ * for details on posting list splits.
+ *
+ * Backup Blk 1: new right page
+ *
+ * The right page's data portion contains the right page's tuples in the form
+ * used by _bt_restore_page.  This includes the new item, if it's the _R
+ * variant.  The right page's tuples also include the right page's high key
+ * with either variant (moved from the left/original page during the split),
+ * unless the split happened to be of the rightmost page on its level, where
+ * there is no high key for new right page.
+ *
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
+ */
+typedef struct xl_btree_split
+{
+	uint32		level;			/* tree level of page being split */
+	OffsetNumber firstrightoff; /* first origpage item on rightpage */
+	OffsetNumber newitemoff;	/* new item's offset */
+	uint16		postingoff;		/* offset inside orig posting tuple */
+} xl_btree_split;
+
+#define SizeOfBtreeSplit	(offsetof(xl_btree_split, postingoff) + sizeof(uint16))
+
+/*
+ * When page is deduplicated, consecutive groups of tuples with equal keys are
+ * merged together into posting list tuples.
+ *
+ * The WAL record represents a deduplication pass for a leaf page.  An array
+ * of BTDedupInterval structs follows.
+ */
+typedef struct xl_btree_dedup
+{
+	uint16		nintervals;
+
+	/* DEDUPLICATION INTERVALS FOLLOW */
+} xl_btree_dedup;
+
+#define SizeOfBtreeDedup 	(offsetof(xl_btree_dedup, nintervals) + sizeof(uint16))
+
+/*
+ * This is what we need to know about page reuse within btree.  This record
+ * only exists to generate a conflict point for Hot Standby.
+ *
+ * Note that we must include a RelFileNode in the record because we don't
+ * actually register the buffer with the record.
+ */
+typedef struct xl_btree_reuse_page
+{
+	RelFileNode node;
+	BlockNumber block;
+	FullTransactionId latestRemovedFullXid;
+} xl_btree_reuse_page;
+
+#define SizeOfBtreeReusePage	(sizeof(xl_btree_reuse_page))
+
+/*
+ * xl_btree_vacuum and xl_btree_delete records describe deletion of index
+ * tuples on a leaf page.  The former variant is used by VACUUM, while the
+ * latter variant is used by the ad-hoc deletions that sometimes take place
+ * when btinsert() is called.
+ *
+ * The records are very similar.  The only difference is that xl_btree_delete
+ * has to include a latestRemovedXid field to generate recovery conflicts.
+ * (VACUUM operations can just rely on earlier conflicts generated during
+ * pruning of the table whose TIDs the to-be-deleted index tuples point to.
+ * There are also small differences between each REDO routine that we don't go
+ * into here.)
+ *
+ * xl_btree_vacuum and xl_btree_delete both represent deletion of any number
+ * of index tuples on a single leaf page using page offset numbers.  Both also
+ * support "updates" of index tuples, which is how deletes of a subset of TIDs
+ * contained in an existing posting list tuple are implemented.
+ *
+ * Updated posting list tuples are represented using xl_btree_update metadata.
+ * The REDO routines each use the xl_btree_update entries (plus each
+ * corresponding original index tuple from the target leaf page) to generate
+ * the final updated tuple.
+ *
+ * Updates are only used when there will be some remaining TIDs left by the
+ * REDO routine.  Otherwise the posting list tuple just gets deleted outright.
+ */
+typedef struct xl_btree_vacuum
+{
+	uint16		ndeleted;
+	uint16		nupdated;
+
+	/* DELETED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TUPLES METADATA (xl_btree_update) ARRAY FOLLOWS */
+} xl_btree_vacuum;
+
+#define SizeOfBtreeVacuum	(offsetof(xl_btree_vacuum, nupdated) + sizeof(uint16))
+
+typedef struct xl_btree_delete
+{
+	TransactionId latestRemovedXid;
+	uint16		ndeleted;
+	uint16		nupdated;
+
+	/* DELETED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TUPLES METADATA (xl_btree_update) ARRAY FOLLOWS */
+} xl_btree_delete;
+
+#define SizeOfBtreeDelete	(offsetof(xl_btree_delete, nupdated) + sizeof(uint16))
+
+/*
+ * The offsets that appear in xl_btree_update metadata are offsets into the
+ * original posting list from tuple, not page offset numbers.  These are
+ * 0-based.  The page offset number for the original posting list tuple comes
+ * from the main xl_btree_vacuum/xl_btree_delete record.
+ */
+typedef struct xl_btree_update
+{
+	uint16		ndeletedtids;
+
+	/* POSTING LIST uint16 OFFSETS TO A DELETED TID FOLLOW */
+} xl_btree_update;
+
+#define SizeOfBtreeUpdate	(offsetof(xl_btree_update, ndeletedtids) + sizeof(uint16))
+
+/*
+ * This is what we need to know about marking an empty subtree for deletion.
+ * The target identifies the tuple removed from the parent page (note that we
+ * remove this tuple's downlink and the *following* tuple's key).  Note that
+ * the leaf page is empty, so we don't need to store its content --- it is
+ * just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
+ */
+typedef struct xl_btree_mark_page_halfdead
+{
+	OffsetNumber poffset;		/* deleted tuple id in parent page */
+
+	/* information needed to recreate the leaf page: */
+	BlockNumber leafblk;		/* leaf block ultimately being deleted */
+	BlockNumber leftblk;		/* leaf block's left sibling, if any */
+	BlockNumber rightblk;		/* leaf block's right sibling */
+	BlockNumber topparent;		/* topmost internal page in the subtree */
+} xl_btree_mark_page_halfdead;
+
+#define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
+
+/*
+ * This is what we need to know about deletion of a btree page.  Note that we
+ * only leave behind a small amount of bookkeeping information in deleted
+ * pages (deleted pages must be kept around as tombstones for a while).  It is
+ * convenient for the REDO routine to regenerate its target page from scratch.
+ * This is why WAL record describes certain details that are actually directly
+ * available from the target page.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage (if rightsib becomes new fast root)
+ */
+typedef struct xl_btree_unlink_page
+{
+	BlockNumber leftsib;		/* target block's left sibling, if any */
+	BlockNumber rightsib;		/* target block's right sibling */
+	uint32		level;			/* target block's level */
+	FullTransactionId safexid;	/* target block's BTPageSetDeleted() XID */
+
+	/*
+	 * Information needed to recreate a half-dead leaf page with correct
+	 * topparent link.  The fields are only used when deletion operation's
+	 * target page is an internal page.  REDO routine creates half-dead page
+	 * from scratch to keep things simple (this is the same convenient
+	 * approach used for the target page itself).
+	 */
+	BlockNumber leafleftsib;
+	BlockNumber leafrightsib;
+	BlockNumber leaftopparent;	/* next child down in the subtree */
+
+	/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */
+} xl_btree_unlink_page;
+
+#define SizeOfBtreeUnlinkPage	(offsetof(xl_btree_unlink_page, leaftopparent) + sizeof(BlockNumber))
+
+/*
+ * New root log record.  There are zero tuples if this is to establish an
+ * empty root, or two if it is the result of splitting an old root.
+ *
+ * Note that although this implies rewriting the metadata page, we don't need
+ * an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
+ */
+typedef struct xl_btree_newroot
+{
+	BlockNumber rootblk;		/* location of new root (redundant with blk 0) */
+	uint32		level;			/* its tree level */
+} xl_btree_newroot;
+
+#define SizeOfBtreeNewroot	(offsetof(xl_btree_newroot, level) + sizeof(uint32))
+
+
+/*
+ * prototypes for functions in nbtxlog.c
+ */
+extern void btree_redo(XLogReaderState *record);
+extern void btree_desc(StringInfo buf, XLogReaderState *record);
+extern const char *btree_identify(uint8 info);
+extern void btree_xlog_startup(void);
+extern void btree_xlog_cleanup(void);
+extern void btree_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* NBTXLOG_H */
diff --git a/src/include/access/parallel.h b/src/include/access/parallel.h
new file mode 100644
index 0000000..93d88ac
--- /dev/null
+++ b/src/include/access/parallel.h
@@ -0,0 +1,82 @@
+/*-------------------------------------------------------------------------
+ *
+ * parallel.h
+ *	  Infrastructure for launching parallel workers
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/parallel.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#include "access/xlogdefs.h"
+#include "lib/ilist.h"
+#include "postmaster/bgworker.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+
+typedef void (*parallel_worker_main_type) (dsm_segment *seg, shm_toc *toc);
+
+typedef struct ParallelWorkerInfo
+{
+	BackgroundWorkerHandle *bgwhandle;
+	shm_mq_handle *error_mqh;
+	int32		pid;
+} ParallelWorkerInfo;
+
+typedef struct ParallelContext
+{
+	dlist_node	node;
+	SubTransactionId subid;
+	int			nworkers;		/* Maximum number of workers to launch */
+	int			nworkers_to_launch; /* Actual number of workers to launch */
+	int			nworkers_launched;
+	char	   *library_name;
+	char	   *function_name;
+	ErrorContextCallback *error_context_stack;
+	shm_toc_estimator estimator;
+	dsm_segment *seg;
+	void	   *private_memory;
+	shm_toc    *toc;
+	ParallelWorkerInfo *worker;
+	int			nknown_attached_workers;
+	bool	   *known_attached_workers;
+} ParallelContext;
+
+typedef struct ParallelWorkerContext
+{
+	dsm_segment *seg;
+	shm_toc    *toc;
+} ParallelWorkerContext;
+
+extern volatile bool ParallelMessagePending;
+extern PGDLLIMPORT int ParallelWorkerNumber;
+extern PGDLLIMPORT bool InitializingParallelWorker;
+
+#define		IsParallelWorker()		(ParallelWorkerNumber >= 0)
+
+extern ParallelContext *CreateParallelContext(const char *library_name,
+											  const char *function_name, int nworkers);
+extern void InitializeParallelDSM(ParallelContext *pcxt);
+extern void ReinitializeParallelDSM(ParallelContext *pcxt);
+extern void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch);
+extern void LaunchParallelWorkers(ParallelContext *pcxt);
+extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
+extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
+extern void DestroyParallelContext(ParallelContext *pcxt);
+extern bool ParallelContextActive(void);
+
+extern void HandleParallelMessageInterrupt(void);
+extern void HandleParallelMessages(void);
+extern void AtEOXact_Parallel(bool isCommit);
+extern void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId);
+extern void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end);
+
+extern void ParallelWorkerMain(Datum main_arg);
+
+#endif							/* PARALLEL_H */
diff --git a/src/include/access/printsimple.h b/src/include/access/printsimple.h
new file mode 100644
index 0000000..67a9950
--- /dev/null
+++ b/src/include/access/printsimple.h
@@ -0,0 +1,23 @@
+/*-------------------------------------------------------------------------
+ *
+ * printsimple.h
+ *	  print simple tuples without catalog access
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/printsimple.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PRINTSIMPLE_H
+#define PRINTSIMPLE_H
+
+#include "tcop/dest.h"
+
+extern bool printsimple(TupleTableSlot *slot, DestReceiver *self);
+extern void printsimple_startup(DestReceiver *self, int operation,
+								TupleDesc tupdesc);
+
+#endif							/* PRINTSIMPLE_H */
diff --git a/src/include/access/printtup.h b/src/include/access/printtup.h
new file mode 100644
index 0000000..c9b3753
--- /dev/null
+++ b/src/include/access/printtup.h
@@ -0,0 +1,35 @@
+/*-------------------------------------------------------------------------
+ *
+ * printtup.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/printtup.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PRINTTUP_H
+#define PRINTTUP_H
+
+#include "utils/portal.h"
+
+extern DestReceiver *printtup_create_DR(CommandDest dest);
+
+extern void SetRemoteDestReceiverParams(DestReceiver *self, Portal portal);
+
+extern void SendRowDescriptionMessage(StringInfo buf,
+									  TupleDesc typeinfo, List *targetlist, int16 *formats);
+
+extern void debugStartup(DestReceiver *self, int operation,
+						 TupleDesc typeinfo);
+extern bool debugtup(TupleTableSlot *slot, DestReceiver *self);
+
+/* XXX these are really in executor/spi.c */
+extern void spi_dest_startup(DestReceiver *self, int operation,
+							 TupleDesc typeinfo);
+extern bool spi_printtup(TupleTableSlot *slot, DestReceiver *self);
+
+#endif							/* PRINTTUP_H */
diff --git a/src/include/access/relation.h b/src/include/access/relation.h
new file mode 100644
index 0000000..fd77a13
--- /dev/null
+++ b/src/include/access/relation.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * relation.h
+ *	  Generic relation related routines.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/relation.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ACCESS_RELATION_H
+#define ACCESS_RELATION_H
+
+#include "nodes/primnodes.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+
+extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
+extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode);
+extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
+extern Relation relation_openrv_extended(const RangeVar *relation,
+										 LOCKMODE lockmode, bool missing_ok);
+extern void relation_close(Relation relation, LOCKMODE lockmode);
+
+#endif							/* ACCESS_RELATION_H */
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
new file mode 100644
index 0000000..7c5fbeb
--- /dev/null
+++ b/src/include/access/reloptions.h
@@ -0,0 +1,247 @@
+/*-------------------------------------------------------------------------
+ *
+ * reloptions.h
+ *	  Core support for relation and tablespace options (pg_class.reloptions
+ *	  and pg_tablespace.spcoptions)
+ *
+ * Note: the functions dealing with text-array reloptions values declare
+ * them as Datum, not ArrayType *, to avoid needing to include array.h
+ * into a lot of low-level code.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/reloptions.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RELOPTIONS_H
+#define RELOPTIONS_H
+
+#include "access/amapi.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+#include "nodes/pg_list.h"
+#include "storage/lock.h"
+
+/* types supported by reloptions */
+typedef enum relopt_type
+{
+	RELOPT_TYPE_BOOL,
+	RELOPT_TYPE_INT,
+	RELOPT_TYPE_REAL,
+	RELOPT_TYPE_ENUM,
+	RELOPT_TYPE_STRING
+} relopt_type;
+
+/* kinds supported by reloptions */
+typedef enum relopt_kind
+{
+	RELOPT_KIND_LOCAL = 0,
+	RELOPT_KIND_HEAP = (1 << 0),
+	RELOPT_KIND_TOAST = (1 << 1),
+	RELOPT_KIND_BTREE = (1 << 2),
+	RELOPT_KIND_HASH = (1 << 3),
+	RELOPT_KIND_GIN = (1 << 4),
+	RELOPT_KIND_GIST = (1 << 5),
+	RELOPT_KIND_ATTRIBUTE = (1 << 6),
+	RELOPT_KIND_TABLESPACE = (1 << 7),
+	RELOPT_KIND_SPGIST = (1 << 8),
+	RELOPT_KIND_VIEW = (1 << 9),
+	RELOPT_KIND_BRIN = (1 << 10),
+	RELOPT_KIND_PARTITIONED = (1 << 11),
+	/* if you add a new kind, make sure you update "last_default" too */
+	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED,
+	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
+	RELOPT_KIND_MAX = (1 << 30)
+} relopt_kind;
+
+/* reloption namespaces allowed for heaps -- currently only TOAST */
+#define HEAP_RELOPT_NAMESPACES { "toast", NULL }
+
+/* generic struct to hold shared data */
+typedef struct relopt_gen
+{
+	const char *name;			/* must be first (used as list termination
+								 * marker) */
+	const char *desc;
+	bits32		kinds;
+	LOCKMODE	lockmode;
+	int			namelen;
+	relopt_type type;
+} relopt_gen;
+
+/* holds a parsed value */
+typedef struct relopt_value
+{
+	relopt_gen *gen;
+	bool		isset;
+	union
+	{
+		bool		bool_val;
+		int			int_val;
+		double		real_val;
+		int			enum_val;
+		char	   *string_val; /* allocated separately */
+	}			values;
+} relopt_value;
+
+/* reloptions records for specific variable types */
+typedef struct relopt_bool
+{
+	relopt_gen	gen;
+	bool		default_val;
+} relopt_bool;
+
+typedef struct relopt_int
+{
+	relopt_gen	gen;
+	int			default_val;
+	int			min;
+	int			max;
+} relopt_int;
+
+typedef struct relopt_real
+{
+	relopt_gen	gen;
+	double		default_val;
+	double		min;
+	double		max;
+} relopt_real;
+
+/*
+ * relopt_enum_elt_def -- One member of the array of acceptable values
+ * of an enum reloption.
+ */
+typedef struct relopt_enum_elt_def
+{
+	const char *string_val;
+	int			symbol_val;
+} relopt_enum_elt_def;
+
+typedef struct relopt_enum
+{
+	relopt_gen	gen;
+	relopt_enum_elt_def *members;
+	int			default_val;
+	const char *detailmsg;
+	/* null-terminated array of members */
+} relopt_enum;
+
+/* validation routines for strings */
+typedef void (*validate_string_relopt) (const char *value);
+typedef Size (*fill_string_relopt) (const char *value, void *ptr);
+
+/* validation routine for the whole option set */
+typedef void (*relopts_validator) (void *parsed_options, relopt_value *vals, int nvals);
+
+typedef struct relopt_string
+{
+	relopt_gen	gen;
+	int			default_len;
+	bool		default_isnull;
+	validate_string_relopt validate_cb;
+	fill_string_relopt fill_cb;
+	char	   *default_val;
+} relopt_string;
+
+/* This is the table datatype for build_reloptions() */
+typedef struct
+{
+	const char *optname;		/* option's name */
+	relopt_type opttype;		/* option's datatype */
+	int			offset;			/* offset of field in result struct */
+} relopt_parse_elt;
+
+/* Local reloption definition */
+typedef struct local_relopt
+{
+	relopt_gen *option;			/* option definition */
+	int			offset;			/* offset of parsed value in bytea structure */
+} local_relopt;
+
+/* Structure to hold local reloption data for build_local_reloptions() */
+typedef struct local_relopts
+{
+	List	   *options;		/* list of local_relopt definitions */
+	List	   *validators;		/* list of relopts_validator callbacks */
+	Size		relopt_struct_size; /* size of parsed bytea structure */
+} local_relopts;
+
+/*
+ * Utility macro to get a value for a string reloption once the options
+ * are parsed.  This gets a pointer to the string value itself.  "optstruct"
+ * is the StdRdOptions struct or equivalent, "member" is the struct member
+ * corresponding to the string option.
+ */
+#define GET_STRING_RELOPTION(optstruct, member) \
+	((optstruct)->member == 0 ? NULL : \
+	 (char *)(optstruct) + (optstruct)->member)
+
+extern relopt_kind add_reloption_kind(void);
+extern void add_bool_reloption(bits32 kinds, const char *name, const char *desc,
+							   bool default_val, LOCKMODE lockmode);
+extern void add_int_reloption(bits32 kinds, const char *name, const char *desc,
+							  int default_val, int min_val, int max_val,
+							  LOCKMODE lockmode);
+extern void add_real_reloption(bits32 kinds, const char *name, const char *desc,
+							   double default_val, double min_val, double max_val,
+							   LOCKMODE lockmode);
+extern void add_enum_reloption(bits32 kinds, const char *name, const char *desc,
+							   relopt_enum_elt_def *members, int default_val,
+							   const char *detailmsg, LOCKMODE lockmode);
+extern void add_string_reloption(bits32 kinds, const char *name, const char *desc,
+								 const char *default_val, validate_string_relopt validator,
+								 LOCKMODE lockmode);
+
+extern void init_local_reloptions(local_relopts *opts, Size relopt_struct_size);
+extern void register_reloptions_validator(local_relopts *opts,
+										  relopts_validator validator);
+extern void add_local_bool_reloption(local_relopts *opts, const char *name,
+									 const char *desc, bool default_val,
+									 int offset);
+extern void add_local_int_reloption(local_relopts *opts, const char *name,
+									const char *desc, int default_val,
+									int min_val, int max_val, int offset);
+extern void add_local_real_reloption(local_relopts *opts, const char *name,
+									 const char *desc, double default_val,
+									 double min_val, double max_val,
+									 int offset);
+extern void add_local_enum_reloption(local_relopts *relopts,
+									 const char *name, const char *desc,
+									 relopt_enum_elt_def *members,
+									 int default_val, const char *detailmsg,
+									 int offset);
+extern void add_local_string_reloption(local_relopts *opts, const char *name,
+									   const char *desc,
+									   const char *default_val,
+									   validate_string_relopt validator,
+									   fill_string_relopt filler, int offset);
+
+extern Datum transformRelOptions(Datum oldOptions, List *defList,
+								 const char *namspace, char *validnsps[],
+								 bool acceptOidsOff, bool isReset);
+extern List *untransformRelOptions(Datum options);
+extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
+								amoptions_function amoptions);
+extern void *build_reloptions(Datum reloptions, bool validate,
+							  relopt_kind kind,
+							  Size relopt_struct_size,
+							  const relopt_parse_elt *relopt_elems,
+							  int num_relopt_elems);
+extern void *build_local_reloptions(local_relopts *relopts, Datum options,
+									bool validate);
+
+extern bytea *default_reloptions(Datum reloptions, bool validate,
+								 relopt_kind kind);
+extern bytea *heap_reloptions(char relkind, Datum reloptions, bool validate);
+extern bytea *view_reloptions(Datum reloptions, bool validate);
+extern bytea *partitioned_table_reloptions(Datum reloptions, bool validate);
+extern bytea *index_reloptions(amoptions_function amoptions, Datum reloptions,
+							   bool validate);
+extern bytea *attribute_reloptions(Datum reloptions, bool validate);
+extern bytea *tablespace_reloptions(Datum reloptions, bool validate);
+extern LOCKMODE AlterTableGetRelOptionsLockLevel(List *defList);
+
+#endif							/* RELOPTIONS_H */
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
new file mode 100644
index 0000000..74a07ef
--- /dev/null
+++ b/src/include/access/relscan.h
@@ -0,0 +1,191 @@
+/*-------------------------------------------------------------------------
+ *
+ * relscan.h
+ *	  POSTGRES relation scan descriptor definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/relscan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RELSCAN_H
+#define RELSCAN_H
+
+#include "access/htup_details.h"
+#include "access/itup.h"
+#include "port/atomics.h"
+#include "storage/buf.h"
+#include "storage/spin.h"
+#include "utils/relcache.h"
+
+
+struct ParallelTableScanDescData;
+
+/*
+ * Generic descriptor for table scans. This is the base-class for table scans,
+ * which needs to be embedded in the scans of individual AMs.
+ */
+typedef struct TableScanDescData
+{
+	/* scan parameters */
+	Relation	rs_rd;			/* heap relation descriptor */
+	struct SnapshotData *rs_snapshot;	/* snapshot to see */
+	int			rs_nkeys;		/* number of scan keys */
+	struct ScanKeyData *rs_key; /* array of scan key descriptors */
+
+	/* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
+	ItemPointerData rs_mintid;
+	ItemPointerData rs_maxtid;
+
+	/*
+	 * Information about type and behaviour of the scan, a bitmask of members
+	 * of the ScanOptions enum (see tableam.h).
+	 */
+	uint32		rs_flags;
+
+	struct ParallelTableScanDescData *rs_parallel;	/* parallel scan
+													 * information */
+} TableScanDescData;
+typedef struct TableScanDescData *TableScanDesc;
+
+/*
+ * Shared state for parallel table scan.
+ *
+ * Each backend participating in a parallel table scan has its own
+ * TableScanDesc in backend-private memory, and those objects all contain a
+ * pointer to this structure.  The information here must be sufficient to
+ * properly initialize each new TableScanDesc as workers join the scan, and it
+ * must act as a information what to scan for those workers.
+ */
+typedef struct ParallelTableScanDescData
+{
+	Oid			phs_relid;		/* OID of relation to scan */
+	bool		phs_syncscan;	/* report location to syncscan logic? */
+	bool		phs_snapshot_any;	/* SnapshotAny, not phs_snapshot_data? */
+	Size		phs_snapshot_off;	/* data for snapshot */
+} ParallelTableScanDescData;
+typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
+
+/*
+ * Shared state for parallel table scans, for block oriented storage.
+ */
+typedef struct ParallelBlockTableScanDescData
+{
+	ParallelTableScanDescData base;
+
+	BlockNumber phs_nblocks;	/* # blocks in relation at start of scan */
+	slock_t		phs_mutex;		/* mutual exclusion for setting startblock */
+	BlockNumber phs_startblock; /* starting block number */
+	pg_atomic_uint64 phs_nallocated;	/* number of blocks allocated to
+										 * workers so far. */
+}			ParallelBlockTableScanDescData;
+typedef struct ParallelBlockTableScanDescData *ParallelBlockTableScanDesc;
+
+/*
+ * Per backend state for parallel table scan, for block-oriented storage.
+ */
+typedef struct ParallelBlockTableScanWorkerData
+{
+	uint64		phsw_nallocated;	/* Current # of blocks into the scan */
+	uint32		phsw_chunk_remaining;	/* # blocks left in this chunk */
+	uint32		phsw_chunk_size;	/* The number of blocks to allocate in
+									 * each I/O chunk for the scan */
+} ParallelBlockTableScanWorkerData;
+typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker;
+
+/*
+ * Base class for fetches from a table via an index. This is the base-class
+ * for such scans, which needs to be embedded in the respective struct for
+ * individual AMs.
+ */
+typedef struct IndexFetchTableData
+{
+	Relation	rel;
+} IndexFetchTableData;
+
+/*
+ * We use the same IndexScanDescData structure for both amgettuple-based
+ * and amgetbitmap-based index scans.  Some fields are only relevant in
+ * amgettuple-based scans.
+ */
+typedef struct IndexScanDescData
+{
+	/* scan parameters */
+	Relation	heapRelation;	/* heap relation descriptor, or NULL */
+	Relation	indexRelation;	/* index relation descriptor */
+	struct SnapshotData *xs_snapshot;	/* snapshot to see */
+	int			numberOfKeys;	/* number of index qualifier conditions */
+	int			numberOfOrderBys;	/* number of ordering operators */
+	struct ScanKeyData *keyData;	/* array of index qualifier descriptors */
+	struct ScanKeyData *orderByData;	/* array of ordering op descriptors */
+	bool		xs_want_itup;	/* caller requests index tuples */
+	bool		xs_temp_snap;	/* unregister snapshot at scan end? */
+
+	/* signaling to index AM about killing index tuples */
+	bool		kill_prior_tuple;	/* last-returned tuple is dead */
+	bool		ignore_killed_tuples;	/* do not return killed entries */
+	bool		xactStartedInRecovery;	/* prevents killing/seeing killed
+										 * tuples */
+
+	/* index access method's private state */
+	void	   *opaque;			/* access-method-specific info */
+
+	/*
+	 * In an index-only scan, a successful amgettuple call must fill either
+	 * xs_itup (and xs_itupdesc) or xs_hitup (and xs_hitupdesc) to provide the
+	 * data returned by the scan.  It can fill both, in which case the heap
+	 * format will be used.
+	 */
+	IndexTuple	xs_itup;		/* index tuple returned by AM */
+	struct TupleDescData *xs_itupdesc;	/* rowtype descriptor of xs_itup */
+	HeapTuple	xs_hitup;		/* index data returned by AM, as HeapTuple */
+	struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
+
+	ItemPointerData xs_heaptid; /* result */
+	bool		xs_heap_continue;	/* T if must keep walking, potential
+									 * further results */
+	IndexFetchTableData *xs_heapfetch;
+
+	bool		xs_recheck;		/* T means scan keys must be rechecked */
+
+	/*
+	 * When fetching with an ordering operator, the values of the ORDER BY
+	 * expressions of the last returned tuple, according to the index.  If
+	 * xs_recheckorderby is true, these need to be rechecked just like the
+	 * scan keys, and the values returned here are a lower-bound on the actual
+	 * values.
+	 */
+	Datum	   *xs_orderbyvals;
+	bool	   *xs_orderbynulls;
+	bool		xs_recheckorderby;
+
+	/* parallel index scan information, in shared memory */
+	struct ParallelIndexScanDescData *parallel_scan;
+}			IndexScanDescData;
+
+/* Generic structure for parallel scans */
+typedef struct ParallelIndexScanDescData
+{
+	Oid			ps_relid;
+	Oid			ps_indexid;
+	Size		ps_offset;		/* Offset in bytes of am specific structure */
+	char		ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+}			ParallelIndexScanDescData;
+
+struct TupleTableSlot;
+
+/* Struct for storage-or-index scans of system tables */
+typedef struct SysScanDescData
+{
+	Relation	heap_rel;		/* catalog being scanned */
+	Relation	irel;			/* NULL if doing heap scan */
+	struct TableScanDescData *scan; /* only valid in storage-scan case */
+	struct IndexScanDescData *iscan;	/* only valid in index-scan case */
+	struct SnapshotData *snapshot;	/* snapshot to unregister at end of scan */
+	struct TupleTableSlot *slot;
+}			SysScanDescData;
+
+#endif							/* RELSCAN_H */
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
new file mode 100644
index 0000000..121f552
--- /dev/null
+++ b/src/include/access/rewriteheap.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * rewriteheap.h
+ *	  Declarations for heap rewrite support functions
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ * src/include/access/rewriteheap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef REWRITE_HEAP_H
+#define REWRITE_HEAP_H
+
+#include "access/htup.h"
+#include "storage/itemptr.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+/* struct definition is private to rewriteheap.c */
+typedef struct RewriteStateData *RewriteState;
+
+extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
+									   TransactionId OldestXmin, TransactionId FreezeXid,
+									   MultiXactId MultiXactCutoff);
+extern void end_heap_rewrite(RewriteState state);
+extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
+							   HeapTuple newTuple);
+extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
+
+/*
+ * On-Disk data format for an individual logical rewrite mapping.
+ */
+typedef struct LogicalRewriteMappingData
+{
+	RelFileNode old_node;
+	RelFileNode new_node;
+	ItemPointerData old_tid;
+	ItemPointerData new_tid;
+} LogicalRewriteMappingData;
+
+/* ---
+ * The filename consists of the following, dash separated,
+ * components:
+ * 1) database oid or InvalidOid for shared relations
+ * 2) the oid of the relation
+ * 3) upper 32bit of the LSN at which a rewrite started
+ * 4) lower 32bit of the LSN at which a rewrite started
+ * 5) xid we are mapping for
+ * 6) xid of the xact performing the mapping
+ * ---
+ */
+#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x"
+void		CheckPointLogicalRewriteHeap(void);
+
+#endif							/* REWRITE_HEAP_H */
diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h
new file mode 100644
index 0000000..c9b5c56
--- /dev/null
+++ b/src/include/access/rmgr.h
@@ -0,0 +1,35 @@
+/*
+ * rmgr.h
+ *
+ * Resource managers definition
+ *
+ * src/include/access/rmgr.h
+ */
+#ifndef RMGR_H
+#define RMGR_H
+
+typedef uint8 RmgrId;
+
+/*
+ * Built-in resource managers
+ *
+ * The actual numerical values for each rmgr ID are defined by the order
+ * of entries in rmgrlist.h.
+ *
+ * Note: RM_MAX_ID must fit in RmgrId; widening that type will affect the XLOG
+ * file format.
+ */
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask) \
+	symname,
+
+typedef enum RmgrIds
+{
+#include "access/rmgrlist.h"
+	RM_NEXT_ID
+} RmgrIds;
+
+#undef PG_RMGR
+
+#define RM_MAX_ID				(RM_NEXT_ID - 1)
+
+#endif							/* RMGR_H */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
new file mode 100644
index 0000000..f582cf5
--- /dev/null
+++ b/src/include/access/rmgrlist.h
@@ -0,0 +1,49 @@
+/*---------------------------------------------------------------------------
+ * rmgrlist.h
+ *
+ * The resource manager list is kept in its own source file for possible
+ * use by automatic tools.  The exact representation of a rmgr is determined
+ * by the PG_RMGR macro, which is not defined in this file; it can be
+ * defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/rmgrlist.h
+ *---------------------------------------------------------------------------
+ */
+
+/* there is deliberately not an #ifndef RMGRLIST_H here */
+
+/*
+ * List of resource manager entries.  Note that order of entries defines the
+ * numerical values of each rmgr's ID, which is stored in WAL records.  New
+ * entries should be added at the end, to avoid changing IDs of existing
+ * entries.
+ *
+ * Changes to this list possibly need an XLOG_PAGE_MAGIC bump.
+ */
+
+/* symbol name, textual name, redo, desc, identify, startup, cleanup */
+PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL)
+PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL)
+PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL)
+PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL)
+PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL)
+PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL)
+PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL)
+PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL)
+PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL)
+PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask)
+PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask)
+PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog_startup, btree_xlog_cleanup, btree_mask)
+PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask)
+PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask)
+PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask)
+PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask)
+PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask)
+PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask)
+PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL)
+PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL)
+PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask)
+PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL)
diff --git a/src/include/access/sdir.h b/src/include/access/sdir.h
new file mode 100644
index 0000000..8154adf
--- /dev/null
+++ b/src/include/access/sdir.h
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * sdir.h
+ *	  POSTGRES scan direction definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/sdir.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SDIR_H
+#define SDIR_H
+
+
+/*
+ * ScanDirection was an int8 for no apparent reason. I kept the original
+ * values because I'm not sure if I'll break anything otherwise.  -ay 2/95
+ */
+typedef enum ScanDirection
+{
+	BackwardScanDirection = -1,
+	NoMovementScanDirection = 0,
+	ForwardScanDirection = 1
+} ScanDirection;
+
+/*
+ * ScanDirectionIsValid
+ *		True iff scan direction is valid.
+ */
+#define ScanDirectionIsValid(direction) \
+	((bool) (BackwardScanDirection <= (direction) && \
+			 (direction) <= ForwardScanDirection))
+
+/*
+ * ScanDirectionIsBackward
+ *		True iff scan direction is backward.
+ */
+#define ScanDirectionIsBackward(direction) \
+	((bool) ((direction) == BackwardScanDirection))
+
+/*
+ * ScanDirectionIsNoMovement
+ *		True iff scan direction indicates no movement.
+ */
+#define ScanDirectionIsNoMovement(direction) \
+	((bool) ((direction) == NoMovementScanDirection))
+
+/*
+ * ScanDirectionIsForward
+ *		True iff scan direction is forward.
+ */
+#define ScanDirectionIsForward(direction) \
+	((bool) ((direction) == ForwardScanDirection))
+
+#endif							/* SDIR_H */
diff --git a/src/include/access/session.h b/src/include/access/session.h
new file mode 100644
index 0000000..82cee5a
--- /dev/null
+++ b/src/include/access/session.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * session.h
+ *	  Encapsulation of user session.
+ *
+ * Copyright (c) 2017-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/session.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SESSION_H
+#define SESSION_H
+
+#include "lib/dshash.h"
+
+/* Avoid including typcache.h */
+struct SharedRecordTypmodRegistry;
+
+/*
+ * A struct encapsulating some elements of a user's session.  For now this
+ * manages state that applies to parallel query, but in principle it could
+ * include other things that are currently global variables.
+ */
+typedef struct Session
+{
+	dsm_segment *segment;		/* The session-scoped DSM segment. */
+	dsa_area   *area;			/* The session-scoped DSA area. */
+
+	/* State managed by typcache.c. */
+	struct SharedRecordTypmodRegistry *shared_typmod_registry;
+	dshash_table *shared_record_table;
+	dshash_table *shared_typmod_table;
+} Session;
+
+extern void InitializeSession(void);
+extern dsm_handle GetSessionDsmHandle(void);
+extern void AttachSession(dsm_handle handle);
+extern void DetachSession(void);
+
+/* The current session, or NULL for none. */
+extern Session *CurrentSession;
+
+#endif							/* SESSION_H */
diff --git a/src/include/access/skey.h b/src/include/access/skey.h
new file mode 100644
index 0000000..92b7d09
--- /dev/null
+++ b/src/include/access/skey.h
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------
+ *
+ * skey.h
+ *	  POSTGRES scan key definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/skey.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SKEY_H
+#define SKEY_H
+
+#include "access/attnum.h"
+#include "access/stratnum.h"
+#include "fmgr.h"
+
+
+/*
+ * A ScanKey represents the application of a comparison operator between
+ * a table or index column and a constant.  When it's part of an array of
+ * ScanKeys, the comparison conditions are implicitly ANDed.  The index
+ * column is the left argument of the operator, if it's a binary operator.
+ * (The data structure can support unary indexable operators too; in that
+ * case sk_argument would go unused.  This is not currently implemented.)
+ *
+ * For an index scan, sk_strategy and sk_subtype must be set correctly for
+ * the operator.  When using a ScanKey in a heap scan, these fields are not
+ * used and may be set to InvalidStrategy/InvalidOid.
+ *
+ * If the operator is collation-sensitive, sk_collation must be set
+ * correctly as well.
+ *
+ * A ScanKey can also represent a ScalarArrayOpExpr, that is a condition
+ * "column op ANY(ARRAY[...])".  This is signaled by the SK_SEARCHARRAY
+ * flag bit.  The sk_argument is not a value of the operator's right-hand
+ * argument type, but rather an array of such values, and the per-element
+ * comparisons are to be ORed together.
+ *
+ * A ScanKey can also represent a condition "column IS NULL" or "column
+ * IS NOT NULL"; these cases are signaled by the SK_SEARCHNULL and
+ * SK_SEARCHNOTNULL flag bits respectively.  The argument is always NULL,
+ * and the sk_strategy, sk_subtype, sk_collation, and sk_func fields are
+ * not used (unless set by the index AM).
+ *
+ * SK_SEARCHARRAY, SK_SEARCHNULL and SK_SEARCHNOTNULL are supported only
+ * for index scans, not heap scans; and not all index AMs support them,
+ * only those that set amsearcharray or amsearchnulls respectively.
+ *
+ * A ScanKey can also represent an ordering operator invocation, that is
+ * an ordering requirement "ORDER BY indexedcol op constant".  This looks
+ * the same as a comparison operator, except that the operator doesn't
+ * (usually) yield boolean.  We mark such ScanKeys with SK_ORDER_BY.
+ * SK_SEARCHARRAY, SK_SEARCHNULL, SK_SEARCHNOTNULL cannot be used here.
+ *
+ * Note: in some places, ScanKeys are used as a convenient representation
+ * for the invocation of an access method support procedure.  In this case
+ * sk_strategy/sk_subtype are not meaningful (but sk_collation can be); and
+ * sk_func may refer to a function that returns something other than boolean.
+ */
+typedef struct ScanKeyData
+{
+	int			sk_flags;		/* flags, see below */
+	AttrNumber	sk_attno;		/* table or index column number */
+	StrategyNumber sk_strategy; /* operator strategy number */
+	Oid			sk_subtype;		/* strategy subtype */
+	Oid			sk_collation;	/* collation to use, if needed */
+	FmgrInfo	sk_func;		/* lookup info for function to call */
+	Datum		sk_argument;	/* data to compare */
+} ScanKeyData;
+
+typedef ScanKeyData *ScanKey;
+
+/*
+ * About row comparisons:
+ *
+ * The ScanKey data structure also supports row comparisons, that is ordered
+ * tuple comparisons like (x, y) > (c1, c2), having the SQL-spec semantics
+ * "x > c1 OR (x = c1 AND y > c2)".  Note that this is currently only
+ * implemented for btree index searches, not for heapscans or any other index
+ * type.  A row comparison is represented by a "header" ScanKey entry plus
+ * a separate array of ScanKeys, one for each column of the row comparison.
+ * The header entry has these properties:
+ *		sk_flags = SK_ROW_HEADER
+ *		sk_attno = index column number for leading column of row comparison
+ *		sk_strategy = btree strategy code for semantics of row comparison
+ *				(ie, < <= > or >=)
+ *		sk_subtype, sk_collation, sk_func: not used
+ *		sk_argument: pointer to subsidiary ScanKey array
+ * If the header is part of a ScanKey array that's sorted by attno, it
+ * must be sorted according to the leading column number.
+ *
+ * The subsidiary ScanKey array appears in logical column order of the row
+ * comparison, which may be different from index column order.  The array
+ * elements are like a normal ScanKey array except that:
+ *		sk_flags must include SK_ROW_MEMBER, plus SK_ROW_END in the last
+ *				element (needed since row header does not include a count)
+ *		sk_func points to the btree comparison support function for the
+ *				opclass, NOT the operator's implementation function.
+ * sk_strategy must be the same in all elements of the subsidiary array,
+ * that is, the same as in the header entry.
+ * SK_SEARCHARRAY, SK_SEARCHNULL, SK_SEARCHNOTNULL cannot be used here.
+ */
+
+/*
+ * ScanKeyData sk_flags
+ *
+ * sk_flags bits 0-15 are reserved for system-wide use (symbols for those
+ * bits should be defined here).  Bits 16-31 are reserved for use within
+ * individual index access methods.
+ */
+#define SK_ISNULL			0x0001	/* sk_argument is NULL */
+#define SK_UNARY			0x0002	/* unary operator (not supported!) */
+#define SK_ROW_HEADER		0x0004	/* row comparison header (see above) */
+#define SK_ROW_MEMBER		0x0008	/* row comparison member (see above) */
+#define SK_ROW_END			0x0010	/* last row comparison member */
+#define SK_SEARCHARRAY		0x0020	/* scankey represents ScalarArrayOp */
+#define SK_SEARCHNULL		0x0040	/* scankey represents "col IS NULL" */
+#define SK_SEARCHNOTNULL	0x0080	/* scankey represents "col IS NOT NULL" */
+#define SK_ORDER_BY			0x0100	/* scankey is for ORDER BY op */
+
+
+/*
+ * prototypes for functions in access/common/scankey.c
+ */
+extern void ScanKeyInit(ScanKey entry,
+						AttrNumber attributeNumber,
+						StrategyNumber strategy,
+						RegProcedure procedure,
+						Datum argument);
+extern void ScanKeyEntryInitialize(ScanKey entry,
+								   int flags,
+								   AttrNumber attributeNumber,
+								   StrategyNumber strategy,
+								   Oid subtype,
+								   Oid collation,
+								   RegProcedure procedure,
+								   Datum argument);
+extern void ScanKeyEntryInitializeWithInfo(ScanKey entry,
+										   int flags,
+										   AttrNumber attributeNumber,
+										   StrategyNumber strategy,
+										   Oid subtype,
+										   Oid collation,
+										   FmgrInfo *finfo,
+										   Datum argument);
+
+#endif							/* SKEY_H */
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
new file mode 100644
index 0000000..dd52e8c
--- /dev/null
+++ b/src/include/access/slru.h
@@ -0,0 +1,174 @@
+/*-------------------------------------------------------------------------
+ *
+ * slru.h
+ *		Simple LRU buffering for transaction status logfiles
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/slru.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SLRU_H
+#define SLRU_H
+
+#include "access/xlogdefs.h"
+#include "storage/lwlock.h"
+#include "storage/sync.h"
+
+
+/*
+ * Define SLRU segment size.  A page is the same BLCKSZ as is used everywhere
+ * else in Postgres.  The segment size can be chosen somewhat arbitrarily;
+ * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
+ * or 64K transactions for SUBTRANS.
+ *
+ * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
+ * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
+ * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
+ * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
+ * take no explicit notice of that fact in slru.c, except when comparing
+ * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
+ */
+#define SLRU_PAGES_PER_SEGMENT	32
+
+/*
+ * Page status codes.  Note that these do not include the "dirty" bit.
+ * page_dirty can be true only in the VALID or WRITE_IN_PROGRESS states;
+ * in the latter case it implies that the page has been re-dirtied since
+ * the write started.
+ */
+typedef enum
+{
+	SLRU_PAGE_EMPTY,			/* buffer is not in use */
+	SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */
+	SLRU_PAGE_VALID,			/* page is valid and not being written */
+	SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */
+} SlruPageStatus;
+
+/*
+ * Shared-memory state
+ */
+typedef struct SlruSharedData
+{
+	LWLock	   *ControlLock;
+
+	/* Number of buffers managed by this SLRU structure */
+	int			num_slots;
+
+	/*
+	 * Arrays holding info for each buffer slot.  Page number is undefined
+	 * when status is EMPTY, as is page_lru_count.
+	 */
+	char	  **page_buffer;
+	SlruPageStatus *page_status;
+	bool	   *page_dirty;
+	int		   *page_number;
+	int		   *page_lru_count;
+	LWLockPadded *buffer_locks;
+
+	/*
+	 * Optional array of WAL flush LSNs associated with entries in the SLRU
+	 * pages.  If not zero/NULL, we must flush WAL before writing pages (true
+	 * for pg_xact, false for multixact, pg_subtrans, pg_notify).  group_lsn[]
+	 * has lsn_groups_per_page entries per buffer slot, each containing the
+	 * highest LSN known for a contiguous group of SLRU entries on that slot's
+	 * page.
+	 */
+	XLogRecPtr *group_lsn;
+	int			lsn_groups_per_page;
+
+	/*----------
+	 * We mark a page "most recently used" by setting
+	 *		page_lru_count[slotno] = ++cur_lru_count;
+	 * The oldest page is therefore the one with the highest value of
+	 *		cur_lru_count - page_lru_count[slotno]
+	 * The counts will eventually wrap around, but this calculation still
+	 * works as long as no page's age exceeds INT_MAX counts.
+	 *----------
+	 */
+	int			cur_lru_count;
+
+	/*
+	 * latest_page_number is the page number of the current end of the log;
+	 * this is not critical data, since we use it only to avoid swapping out
+	 * the latest page.
+	 */
+	int			latest_page_number;
+
+	/* SLRU's index for statistics purposes (might not be unique) */
+	int			slru_stats_idx;
+} SlruSharedData;
+
+typedef SlruSharedData *SlruShared;
+
+/*
+ * SlruCtlData is an unshared structure that points to the active information
+ * in shared memory.
+ */
+typedef struct SlruCtlData
+{
+	SlruShared	shared;
+
+	/*
+	 * Which sync handler function to use when handing sync requests over to
+	 * the checkpointer.  SYNC_HANDLER_NONE to disable fsync (eg pg_notify).
+	 */
+	SyncRequestHandler sync_handler;
+
+	/*
+	 * Decide whether a page is "older" for truncation and as a hint for
+	 * evicting pages in LRU order.  Return true if every entry of the first
+	 * argument is older than every entry of the second argument.  Note that
+	 * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also
+	 * arises when some entries are older and some are not.  For SLRUs using
+	 * SimpleLruTruncate(), this must use modular arithmetic.  (For others,
+	 * the behavior of this callback has no functional implications.)  Use
+	 * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria.
+	 */
+	bool		(*PagePrecedes) (int, int);
+
+	/*
+	 * Dir is set during SimpleLruInit and does not change thereafter. Since
+	 * it's always the same, it doesn't need to be in shared memory.
+	 */
+	char		Dir[64];
+} SlruCtlData;
+
+typedef SlruCtlData *SlruCtl;
+
+
+extern Size SimpleLruShmemSize(int nslots, int nlsns);
+extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
+						  LWLock *ctllock, const char *subdir, int tranche_id,
+						  SyncRequestHandler sync_handler);
+extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
+extern int	SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
+							  TransactionId xid);
+extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
+									   TransactionId xid);
+extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
+extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied);
+#ifdef USE_ASSERT_CHECKING
+extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page);
+#else
+#define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0)
+#endif
+extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
+extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
+
+typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
+								  void *data);
+extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
+extern void SlruDeleteSegment(SlruCtl ctl, int segno);
+
+extern int	SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path);
+
+/* SlruScanDirectory public callbacks */
+extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,
+										int segpage, void *data);
+extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage,
+								   void *data);
+
+#endif							/* SLRU_H */
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
new file mode 100644
index 0000000..2eb2f42
--- /dev/null
+++ b/src/include/access/spgist.h
@@ -0,0 +1,229 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgist.h
+ *	  Public header file for SP-GiST access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/spgist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPGIST_H
+#define SPGIST_H
+
+#include "access/amapi.h"
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+
+
+/* SPGiST opclass support function numbers */
+#define SPGIST_CONFIG_PROC				1
+#define SPGIST_CHOOSE_PROC				2
+#define SPGIST_PICKSPLIT_PROC			3
+#define SPGIST_INNER_CONSISTENT_PROC	4
+#define SPGIST_LEAF_CONSISTENT_PROC		5
+#define SPGIST_COMPRESS_PROC			6
+#define SPGIST_OPTIONS_PROC				7
+#define SPGISTNRequiredProc				5
+#define SPGISTNProc						7
+
+/*
+ * Argument structs for spg_config method
+ */
+typedef struct spgConfigIn
+{
+	Oid			attType;		/* Data type to be indexed */
+} spgConfigIn;
+
+typedef struct spgConfigOut
+{
+	Oid			prefixType;		/* Data type of inner-tuple prefixes */
+	Oid			labelType;		/* Data type of inner-tuple node labels */
+	Oid			leafType;		/* Data type of leaf-tuple values */
+	bool		canReturnData;	/* Opclass can reconstruct original data */
+	bool		longValuesOK;	/* Opclass can cope with values > 1 page */
+} spgConfigOut;
+
+/*
+ * Argument structs for spg_choose method
+ */
+typedef struct spgChooseIn
+{
+	Datum		datum;			/* original datum to be indexed */
+	Datum		leafDatum;		/* current datum to be stored at leaf */
+	int			level;			/* current level (counting from zero) */
+
+	/* Data from current inner tuple */
+	bool		allTheSame;		/* tuple is marked all-the-same? */
+	bool		hasPrefix;		/* tuple has a prefix? */
+	Datum		prefixDatum;	/* if so, the prefix value */
+	int			nNodes;			/* number of nodes in the inner tuple */
+	Datum	   *nodeLabels;		/* node label values (NULL if none) */
+} spgChooseIn;
+
+typedef enum spgChooseResultType
+{
+	spgMatchNode = 1,			/* descend into existing node */
+	spgAddNode,					/* add a node to the inner tuple */
+	spgSplitTuple				/* split inner tuple (change its prefix) */
+} spgChooseResultType;
+
+typedef struct spgChooseOut
+{
+	spgChooseResultType resultType; /* action code, see above */
+	union
+	{
+		struct					/* results for spgMatchNode */
+		{
+			int			nodeN;	/* descend to this node (index from 0) */
+			int			levelAdd;	/* increment level by this much */
+			Datum		restDatum;	/* new leaf datum */
+		}			matchNode;
+		struct					/* results for spgAddNode */
+		{
+			Datum		nodeLabel;	/* new node's label */
+			int			nodeN;	/* where to insert it (index from 0) */
+		}			addNode;
+		struct					/* results for spgSplitTuple */
+		{
+			/* Info to form new upper-level inner tuple with one child tuple */
+			bool		prefixHasPrefix;	/* tuple should have a prefix? */
+			Datum		prefixPrefixDatum;	/* if so, its value */
+			int			prefixNNodes;	/* number of nodes */
+			Datum	   *prefixNodeLabels;	/* their labels (or NULL for no
+											 * labels) */
+			int			childNodeN; /* which node gets child tuple */
+
+			/* Info to form new lower-level inner tuple with all old nodes */
+			bool		postfixHasPrefix;	/* tuple should have a prefix? */
+			Datum		postfixPrefixDatum; /* if so, its value */
+		}			splitTuple;
+	}			result;
+} spgChooseOut;
+
+/*
+ * Argument structs for spg_picksplit method
+ */
+typedef struct spgPickSplitIn
+{
+	int			nTuples;		/* number of leaf tuples */
+	Datum	   *datums;			/* their datums (array of length nTuples) */
+	int			level;			/* current level (counting from zero) */
+} spgPickSplitIn;
+
+typedef struct spgPickSplitOut
+{
+	bool		hasPrefix;		/* new inner tuple should have a prefix? */
+	Datum		prefixDatum;	/* if so, its value */
+
+	int			nNodes;			/* number of nodes for new inner tuple */
+	Datum	   *nodeLabels;		/* their labels (or NULL for no labels) */
+
+	int		   *mapTuplesToNodes;	/* node index for each leaf tuple */
+	Datum	   *leafTupleDatums;	/* datum to store in each new leaf tuple */
+} spgPickSplitOut;
+
+/*
+ * Argument structs for spg_inner_consistent method
+ */
+typedef struct spgInnerConsistentIn
+{
+	ScanKey		scankeys;		/* array of operators and comparison values */
+	ScanKey		orderbys;		/* array of ordering operators and comparison
+								 * values */
+	int			nkeys;			/* length of scankeys array */
+	int			norderbys;		/* length of orderbys array */
+
+	Datum		reconstructedValue; /* value reconstructed at parent */
+	void	   *traversalValue; /* opclass-specific traverse value */
+	MemoryContext traversalMemoryContext;	/* put new traverse values here */
+	int			level;			/* current level (counting from zero) */
+	bool		returnData;		/* original data must be returned? */
+
+	/* Data from current inner tuple */
+	bool		allTheSame;		/* tuple is marked all-the-same? */
+	bool		hasPrefix;		/* tuple has a prefix? */
+	Datum		prefixDatum;	/* if so, the prefix value */
+	int			nNodes;			/* number of nodes in the inner tuple */
+	Datum	   *nodeLabels;		/* node label values (NULL if none) */
+} spgInnerConsistentIn;
+
+typedef struct spgInnerConsistentOut
+{
+	int			nNodes;			/* number of child nodes to be visited */
+	int		   *nodeNumbers;	/* their indexes in the node array */
+	int		   *levelAdds;		/* increment level by this much for each */
+	Datum	   *reconstructedValues;	/* associated reconstructed values */
+	void	  **traversalValues;	/* opclass-specific traverse values */
+	double	  **distances;		/* associated distances */
+} spgInnerConsistentOut;
+
+/*
+ * Argument structs for spg_leaf_consistent method
+ */
+typedef struct spgLeafConsistentIn
+{
+	ScanKey		scankeys;		/* array of operators and comparison values */
+	ScanKey		orderbys;		/* array of ordering operators and comparison
+								 * values */
+	int			nkeys;			/* length of scankeys array */
+	int			norderbys;		/* length of orderbys array */
+
+	Datum		reconstructedValue; /* value reconstructed at parent */
+	void	   *traversalValue; /* opclass-specific traverse value */
+	int			level;			/* current level (counting from zero) */
+	bool		returnData;		/* original data must be returned? */
+
+	Datum		leafDatum;		/* datum in leaf tuple */
+} spgLeafConsistentIn;
+
+typedef struct spgLeafConsistentOut
+{
+	Datum		leafValue;		/* reconstructed original data, if any */
+	bool		recheck;		/* set true if operator must be rechecked */
+	bool		recheckDistances;	/* set true if distances must be rechecked */
+	double	   *distances;		/* associated distances */
+} spgLeafConsistentOut;
+
+
+/* spgutils.c */
+extern bytea *spgoptions(Datum reloptions, bool validate);
+
+/* spginsert.c */
+extern IndexBuildResult *spgbuild(Relation heap, Relation index,
+								  struct IndexInfo *indexInfo);
+extern void spgbuildempty(Relation index);
+extern bool spginsert(Relation index, Datum *values, bool *isnull,
+					  ItemPointer ht_ctid, Relation heapRel,
+					  IndexUniqueCheck checkUnique,
+					  bool indexUnchanged,
+					  struct IndexInfo *indexInfo);
+
+/* spgscan.c */
+extern IndexScanDesc spgbeginscan(Relation rel, int keysz, int orderbysz);
+extern void spgendscan(IndexScanDesc scan);
+extern void spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+					  ScanKey orderbys, int norderbys);
+extern int64 spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern bool spggettuple(IndexScanDesc scan, ScanDirection dir);
+extern bool spgcanreturn(Relation index, int attno);
+
+/* spgvacuum.c */
+extern IndexBulkDeleteResult *spgbulkdelete(IndexVacuumInfo *info,
+											IndexBulkDeleteResult *stats,
+											IndexBulkDeleteCallback callback,
+											void *callback_state);
+extern IndexBulkDeleteResult *spgvacuumcleanup(IndexVacuumInfo *info,
+											   IndexBulkDeleteResult *stats);
+
+/* spgvalidate.c */
+extern bool spgvalidate(Oid opclassoid);
+extern void spgadjustmembers(Oid opfamilyoid,
+							 Oid opclassoid,
+							 List *operators,
+							 List *functions);
+
+#endif							/* SPGIST_H */
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
new file mode 100644
index 0000000..40d3b71
--- /dev/null
+++ b/src/include/access/spgist_private.h
@@ -0,0 +1,548 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgist_private.h
+ *	  Private declarations for SP-GiST access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/spgist_private.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPGIST_PRIVATE_H
+#define SPGIST_PRIVATE_H
+
+#include "access/itup.h"
+#include "access/spgist.h"
+#include "catalog/pg_am_d.h"
+#include "nodes/tidbitmap.h"
+#include "storage/buf.h"
+#include "utils/geo_decls.h"
+#include "utils/relcache.h"
+
+
+typedef struct SpGistOptions
+{
+	int32		varlena_header_;	/* varlena header (do not touch directly!) */
+	int			fillfactor;		/* page fill factor in percent (0..100) */
+} SpGistOptions;
+
+#define SpGistGetFillFactor(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == SPGIST_AM_OID), \
+	 (relation)->rd_options ? \
+	 ((SpGistOptions *) (relation)->rd_options)->fillfactor : \
+	 SPGIST_DEFAULT_FILLFACTOR)
+#define SpGistGetTargetPageFreeSpace(relation) \
+	(BLCKSZ * (100 - SpGistGetFillFactor(relation)) / 100)
+
+
+/* SPGiST leaf tuples have one key column, optionally have included columns */
+#define spgKeyColumn 0
+#define spgFirstIncludeColumn 1
+
+/* Page numbers of fixed-location pages */
+#define SPGIST_METAPAGE_BLKNO	 (0)	/* metapage */
+#define SPGIST_ROOT_BLKNO		 (1)	/* root for normal entries */
+#define SPGIST_NULL_BLKNO		 (2)	/* root for null-value entries */
+#define SPGIST_LAST_FIXED_BLKNO  SPGIST_NULL_BLKNO
+
+#define SpGistBlockIsRoot(blkno) \
+	((blkno) == SPGIST_ROOT_BLKNO || (blkno) == SPGIST_NULL_BLKNO)
+#define SpGistBlockIsFixed(blkno) \
+	((BlockNumber) (blkno) <= (BlockNumber) SPGIST_LAST_FIXED_BLKNO)
+
+/*
+ * Contents of page special space on SPGiST index pages
+ */
+typedef struct SpGistPageOpaqueData
+{
+	uint16		flags;			/* see bit definitions below */
+	uint16		nRedirection;	/* number of redirection tuples on page */
+	uint16		nPlaceholder;	/* number of placeholder tuples on page */
+	/* note there's no count of either LIVE or DEAD tuples ... */
+	uint16		spgist_page_id; /* for identification of SP-GiST indexes */
+} SpGistPageOpaqueData;
+
+typedef SpGistPageOpaqueData *SpGistPageOpaque;
+
+/* Flag bits in page special space */
+#define SPGIST_META			(1<<0)
+#define SPGIST_DELETED		(1<<1)	/* never set, but keep for backwards
+									 * compatibility */
+#define SPGIST_LEAF			(1<<2)
+#define SPGIST_NULLS		(1<<3)
+
+#define SpGistPageGetOpaque(page) ((SpGistPageOpaque) PageGetSpecialPointer(page))
+#define SpGistPageIsMeta(page) (SpGistPageGetOpaque(page)->flags & SPGIST_META)
+#define SpGistPageIsDeleted(page) (SpGistPageGetOpaque(page)->flags & SPGIST_DELETED)
+#define SpGistPageIsLeaf(page) (SpGistPageGetOpaque(page)->flags & SPGIST_LEAF)
+#define SpGistPageStoresNulls(page) (SpGistPageGetOpaque(page)->flags & SPGIST_NULLS)
+
+/*
+ * The page ID is for the convenience of pg_filedump and similar utilities,
+ * which otherwise would have a hard time telling pages of different index
+ * types apart.  It should be the last 2 bytes on the page.  This is more or
+ * less "free" due to alignment considerations.
+ *
+ * See comments above GinPageOpaqueData.
+ */
+#define SPGIST_PAGE_ID		0xFF82
+
+/*
+ * Each backend keeps a cache of last-used page info in its index->rd_amcache
+ * area.  This is initialized from, and occasionally written back to,
+ * shared storage in the index metapage.
+ */
+typedef struct SpGistLastUsedPage
+{
+	BlockNumber blkno;			/* block number, or InvalidBlockNumber */
+	int			freeSpace;		/* page's free space (could be obsolete!) */
+} SpGistLastUsedPage;
+
+/* Note: indexes in cachedPage[] match flag assignments for SpGistGetBuffer */
+#define SPGIST_CACHED_PAGES 8
+
+typedef struct SpGistLUPCache
+{
+	SpGistLastUsedPage cachedPage[SPGIST_CACHED_PAGES];
+} SpGistLUPCache;
+
+/*
+ * metapage
+ */
+typedef struct SpGistMetaPageData
+{
+	uint32		magicNumber;	/* for identity cross-check */
+	SpGistLUPCache lastUsedPages;	/* shared storage of last-used info */
+} SpGistMetaPageData;
+
+#define SPGIST_MAGIC_NUMBER (0xBA0BABEE)
+
+#define SpGistPageGetMeta(p) \
+	((SpGistMetaPageData *) PageGetContents(p))
+
+/*
+ * Private state of index AM.  SpGistState is common to both insert and
+ * search code; SpGistScanOpaque is for searches only.
+ */
+
+typedef struct SpGistLeafTupleData *SpGistLeafTuple;	/* forward reference */
+
+/* Per-datatype info needed in SpGistState */
+typedef struct SpGistTypeDesc
+{
+	Oid			type;
+	int16		attlen;
+	bool		attbyval;
+	char		attalign;
+	char		attstorage;
+} SpGistTypeDesc;
+
+typedef struct SpGistState
+{
+	Relation	index;			/* index we're working with */
+
+	spgConfigOut config;		/* filled in by opclass config method */
+
+	SpGistTypeDesc attType;		/* type of values to be indexed/restored */
+	SpGistTypeDesc attLeafType; /* type of leaf-tuple values */
+	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
+	SpGistTypeDesc attLabelType;	/* type of node label values */
+
+	/* leafTupDesc typically points to index's tupdesc, but not always */
+	TupleDesc	leafTupDesc;	/* descriptor for leaf-level tuples */
+
+	char	   *deadTupleStorage;	/* workspace for spgFormDeadTuple */
+
+	TransactionId myXid;		/* XID to use when creating a redirect tuple */
+	bool		isBuild;		/* true if doing index build */
+} SpGistState;
+
+/* Item to be re-examined later during a search */
+typedef struct SpGistSearchItem
+{
+	pairingheap_node phNode;	/* pairing heap node */
+	Datum		value;			/* value reconstructed from parent, or
+								 * leafValue if isLeaf */
+	SpGistLeafTuple leafTuple;	/* whole leaf tuple, if needed */
+	void	   *traversalValue; /* opclass-specific traverse value */
+	int			level;			/* level of items on this page */
+	ItemPointerData heapPtr;	/* heap info, if heap tuple */
+	bool		isNull;			/* SearchItem is NULL item */
+	bool		isLeaf;			/* SearchItem is heap item */
+	bool		recheck;		/* qual recheck is needed */
+	bool		recheckDistances;	/* distance recheck is needed */
+
+	/* array with numberOfOrderBys entries */
+	double		distances[FLEXIBLE_ARRAY_MEMBER];
+} SpGistSearchItem;
+
+#define SizeOfSpGistSearchItem(n_distances) \
+	(offsetof(SpGistSearchItem, distances) + sizeof(double) * (n_distances))
+
+/*
+ * Private state of an index scan
+ */
+typedef struct SpGistScanOpaqueData
+{
+	SpGistState state;			/* see above */
+	pairingheap *scanQueue;		/* queue of to be visited items */
+	MemoryContext tempCxt;		/* short-lived memory context */
+	MemoryContext traversalCxt; /* single scan lifetime memory context */
+
+	/* Control flags showing whether to search nulls and/or non-nulls */
+	bool		searchNulls;	/* scan matches (all) null entries */
+	bool		searchNonNulls; /* scan matches (some) non-null entries */
+
+	/* Index quals to be passed to opclass (null-related quals removed) */
+	int			numberOfKeys;	/* number of index qualifier conditions */
+	ScanKey		keyData;		/* array of index qualifier descriptors */
+	int			numberOfOrderBys;	/* number of ordering operators */
+	int			numberOfNonNullOrderBys;	/* number of ordering operators
+											 * with non-NULL arguments */
+	ScanKey		orderByData;	/* array of ordering op descriptors */
+	Oid		   *orderByTypes;	/* array of ordering op return types */
+	int		   *nonNullOrderByOffsets;	/* array of offset of non-NULL
+										 * ordering keys in the original array */
+	Oid			indexCollation; /* collation of index column */
+
+	/* Opclass defined functions: */
+	FmgrInfo	innerConsistentFn;
+	FmgrInfo	leafConsistentFn;
+
+	/* Pre-allocated workspace arrays: */
+	double	   *zeroDistances;
+	double	   *infDistances;
+
+	/* These fields are only used in amgetbitmap scans: */
+	TIDBitmap  *tbm;			/* bitmap being filled */
+	int64		ntids;			/* number of TIDs passed to bitmap */
+
+	/* These fields are only used in amgettuple scans: */
+	bool		want_itup;		/* are we reconstructing tuples? */
+	TupleDesc	reconTupDesc;	/* if so, descriptor for reconstructed tuples */
+	int			nPtrs;			/* number of TIDs found on current page */
+	int			iPtr;			/* index for scanning through same */
+	ItemPointerData heapPtrs[MaxIndexTuplesPerPage];	/* TIDs from cur page */
+	bool		recheck[MaxIndexTuplesPerPage]; /* their recheck flags */
+	bool		recheckDistances[MaxIndexTuplesPerPage];	/* distance recheck
+															 * flags */
+	HeapTuple	reconTups[MaxIndexTuplesPerPage];	/* reconstructed tuples */
+
+	/* distances (for recheck) */
+	IndexOrderByDistance *distances[MaxIndexTuplesPerPage];
+
+	/*
+	 * Note: using MaxIndexTuplesPerPage above is a bit hokey since
+	 * SpGistLeafTuples aren't exactly IndexTuples; however, they are larger,
+	 * so this is safe.
+	 */
+} SpGistScanOpaqueData;
+
+typedef SpGistScanOpaqueData *SpGistScanOpaque;
+
+/*
+ * This struct is what we actually keep in index->rd_amcache.  It includes
+ * static configuration information as well as the lastUsedPages cache.
+ */
+typedef struct SpGistCache
+{
+	spgConfigOut config;		/* filled in by opclass config method */
+
+	SpGistTypeDesc attType;		/* type of values to be indexed/restored */
+	SpGistTypeDesc attLeafType; /* type of leaf-tuple values */
+	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
+	SpGistTypeDesc attLabelType;	/* type of node label values */
+
+	SpGistLUPCache lastUsedPages;	/* local storage of last-used info */
+} SpGistCache;
+
+
+/*
+ * SPGiST tuple types.  Note: inner, leaf, and dead tuple structs
+ * must have the same tupstate field in the same position!	Real inner and
+ * leaf tuples always have tupstate = LIVE; if the state is something else,
+ * use the SpGistDeadTuple struct to inspect the tuple.
+ */
+
+/* values of tupstate (see README for more info) */
+#define SPGIST_LIVE			0	/* normal live tuple (either inner or leaf) */
+#define SPGIST_REDIRECT		1	/* temporary redirection placeholder */
+#define SPGIST_DEAD			2	/* dead, cannot be removed because of links */
+#define SPGIST_PLACEHOLDER	3	/* placeholder, used to preserve offsets */
+
+/*
+ * SPGiST inner tuple: list of "nodes" that subdivide a set of tuples
+ *
+ * Inner tuple layout:
+ * header/optional prefix/array of nodes, which are SpGistNodeTuples
+ *
+ * size and prefixSize must be multiples of MAXALIGN
+ *
+ * If the prefix datum is of a pass-by-value type, it is stored in its
+ * Datum representation, that is its on-disk representation is of length
+ * sizeof(Datum).  This is a fairly unfortunate choice, because in no other
+ * place does Postgres use Datum as an on-disk representation; it creates
+ * an unnecessary incompatibility between 32-bit and 64-bit builds.  But the
+ * compatibility loss is mostly theoretical since MAXIMUM_ALIGNOF typically
+ * differs between such builds, too.  Anyway we're stuck with it now.
+ */
+typedef struct SpGistInnerTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				allTheSame:1,	/* all nodes in tuple are equivalent */
+				nNodes:13,		/* number of nodes within inner tuple */
+				prefixSize:16;	/* size of prefix, or 0 if none */
+	uint16		size;			/* total size of inner tuple */
+	/* On most machines there will be a couple of wasted bytes here */
+	/* prefix datum follows, then nodes */
+} SpGistInnerTupleData;
+
+typedef SpGistInnerTupleData *SpGistInnerTuple;
+
+/* these must match largest values that fit in bit fields declared above */
+#define SGITMAXNNODES		0x1FFF
+#define SGITMAXPREFIXSIZE	0xFFFF
+#define SGITMAXSIZE			0xFFFF
+
+#define SGITHDRSZ			MAXALIGN(sizeof(SpGistInnerTupleData))
+#define _SGITDATA(x)		(((char *) (x)) + SGITHDRSZ)
+#define SGITDATAPTR(x)		((x)->prefixSize ? _SGITDATA(x) : NULL)
+#define SGITDATUM(x, s)		((x)->prefixSize ? \
+							 ((s)->attPrefixType.attbyval ? \
+							  *(Datum *) _SGITDATA(x) : \
+							  PointerGetDatum(_SGITDATA(x))) \
+							 : (Datum) 0)
+#define SGITNODEPTR(x)		((SpGistNodeTuple) (_SGITDATA(x) + (x)->prefixSize))
+
+/* Macro for iterating through the nodes of an inner tuple */
+#define SGITITERATE(x, i, nt)	\
+	for ((i) = 0, (nt) = SGITNODEPTR(x); \
+		 (i) < (x)->nNodes; \
+		 (i)++, (nt) = (SpGistNodeTuple) (((char *) (nt)) + IndexTupleSize(nt)))
+
+/*
+ * SPGiST node tuple: one node within an inner tuple
+ *
+ * Node tuples use the same header as ordinary Postgres IndexTuples, but
+ * we do not use a null bitmap, because we know there is only one column
+ * so the INDEX_NULL_MASK bit suffices.  Also, pass-by-value datums are
+ * stored in Datum form, the same convention as for inner tuple prefixes.
+ */
+
+typedef IndexTupleData SpGistNodeTupleData;
+
+typedef SpGistNodeTupleData *SpGistNodeTuple;
+
+#define SGNTHDRSZ			MAXALIGN(sizeof(SpGistNodeTupleData))
+#define SGNTDATAPTR(x)		(((char *) (x)) + SGNTHDRSZ)
+#define SGNTDATUM(x, s)		((s)->attLabelType.attbyval ? \
+							 *(Datum *) SGNTDATAPTR(x) : \
+							 PointerGetDatum(SGNTDATAPTR(x)))
+
+/*
+ * SPGiST leaf tuple: carries a leaf datum and a heap tuple TID,
+ * and optionally some "included" columns.
+ *
+ * In the simplest case, the leaf datum is the same as the indexed value;
+ * but it could also be a suffix or some other sort of delta that permits
+ * reconstruction given knowledge of the prefix path traversed to get here.
+ * Any included columns are stored without modification.
+ *
+ * A nulls bitmap is present if there are included columns AND any of the
+ * datums are NULL.  We do not need a nulls bitmap for the case of a null
+ * leaf datum without included columns, as we can infer whether the leaf
+ * datum is null from whether the tuple is stored on a nulls page.  (This
+ * provision is mostly for backwards compatibility, but it does save space
+ * on 32-bit machines.)  As with other PG index tuple designs, if the nulls
+ * bitmap exists then it's of size INDEX_MAX_KEYS bits regardless of the
+ * actual number of attributes.  For the usual choice of INDEX_MAX_KEYS,
+ * this costs nothing because of alignment considerations.
+ *
+ * The size field is wider than could possibly be needed for an on-disk leaf
+ * tuple, but this allows us to form leaf tuples even when the datum is too
+ * wide to be stored immediately, and it costs nothing because of alignment
+ * considerations.
+ *
+ * t_info holds the nextOffset field (14 bits wide, enough for supported
+ * page sizes) plus the has-nulls-bitmap flag bit; another flag bit is free.
+ *
+ * Normally, nextOffset links to the next tuple belonging to the same parent
+ * node (which must be on the same page), or it's 0 if there is no next tuple.
+ * But when the root page is a leaf page, we don't chain its tuples,
+ * so nextOffset is always 0 on the root.
+ *
+ * size must be a multiple of MAXALIGN; also, it must be at least SGDTSIZE
+ * so that the tuple can be converted to REDIRECT status later.  (This
+ * restriction only adds bytes for a NULL leaf datum stored on a 32-bit
+ * machine; otherwise alignment restrictions force it anyway.)
+ */
+typedef struct SpGistLeafTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				size:30;		/* large enough for any palloc'able value */
+	uint16		t_info;			/* nextOffset, which links to the next tuple
+								 * in chain, plus two flag bits */
+	ItemPointerData heapPtr;	/* TID of represented heap tuple */
+	/* nulls bitmap follows if the flag bit for it is set */
+	/* leaf datum, then any included datums, follows on a MAXALIGN boundary */
+} SpGistLeafTupleData;
+
+/* Macros to access nextOffset and bit fields inside t_info */
+#define SGLT_GET_NEXTOFFSET(spgLeafTuple) \
+	((spgLeafTuple)->t_info & 0x3FFF)
+#define SGLT_GET_HASNULLMASK(spgLeafTuple) \
+	(((spgLeafTuple)->t_info & 0x8000) ? true : false)
+#define SGLT_SET_NEXTOFFSET(spgLeafTuple, offsetNumber) \
+	((spgLeafTuple)->t_info = \
+	 ((spgLeafTuple)->t_info & 0xC000) | ((offsetNumber) & 0x3FFF))
+#define SGLT_SET_HASNULLMASK(spgLeafTuple, hasnulls) \
+	((spgLeafTuple)->t_info = \
+	 ((spgLeafTuple)->t_info & 0x7FFF) | ((hasnulls) ? 0x8000 : 0))
+
+#define SGLTHDRSZ(hasnulls) \
+	((hasnulls) ? MAXALIGN(sizeof(SpGistLeafTupleData) + \
+						   sizeof(IndexAttributeBitMapData)) : \
+	 MAXALIGN(sizeof(SpGistLeafTupleData)))
+#define SGLTDATAPTR(x)		(((char *) (x)) + SGLTHDRSZ(SGLT_GET_HASNULLMASK(x)))
+#define SGLTDATUM(x, s)		fetch_att(SGLTDATAPTR(x), \
+									  (s)->attLeafType.attbyval, \
+									  (s)->attLeafType.attlen)
+
+/*
+ * SPGiST dead tuple: declaration for examining non-live tuples
+ *
+ * The tupstate field of this struct must match those of regular inner and
+ * leaf tuples, and its size field must match a leaf tuple's.
+ * Also, the pointer field must be in the same place as a leaf tuple's heapPtr
+ * field, to satisfy some Asserts that we make when replacing a leaf tuple
+ * with a dead tuple.
+ * We don't use t_info, but it's needed to align the pointer field.
+ * pointer and xid are only valid when tupstate = REDIRECT.
+ */
+typedef struct SpGistDeadTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				size:30;
+	uint16		t_info;			/* not used in dead tuples */
+	ItemPointerData pointer;	/* redirection inside index */
+	TransactionId xid;			/* ID of xact that inserted this tuple */
+} SpGistDeadTupleData;
+
+typedef SpGistDeadTupleData *SpGistDeadTuple;
+
+#define SGDTSIZE		MAXALIGN(sizeof(SpGistDeadTupleData))
+
+/*
+ * Macros for doing free-space calculations.  Note that when adding up the
+ * space needed for tuples, we always consider each tuple to need the tuple's
+ * size plus sizeof(ItemIdData) (for the line pointer).  This works correctly
+ * so long as tuple sizes are always maxaligned.
+ */
+
+/* Page capacity after allowing for fixed header and special space */
+#define SPGIST_PAGE_CAPACITY  \
+	MAXALIGN_DOWN(BLCKSZ - \
+				  SizeOfPageHeaderData - \
+				  MAXALIGN(sizeof(SpGistPageOpaqueData)))
+
+/*
+ * Compute free space on page, assuming that up to n placeholders can be
+ * recycled if present (n should be the number of tuples to be inserted)
+ */
+#define SpGistPageGetFreeSpace(p, n) \
+	(PageGetExactFreeSpace(p) + \
+	 Min(SpGistPageGetOpaque(p)->nPlaceholder, n) * \
+	 (SGDTSIZE + sizeof(ItemIdData)))
+
+/*
+ * XLOG stuff
+ */
+
+#define STORE_STATE(s, d)  \
+	do { \
+		(d).myXid = (s)->myXid; \
+		(d).isBuild = (s)->isBuild; \
+	} while(0)
+
+/*
+ * The "flags" argument for SpGistGetBuffer should be either GBUF_LEAF to
+ * get a leaf page, or GBUF_INNER_PARITY(blockNumber) to get an inner
+ * page in the same triple-parity group as the specified block number.
+ * (Typically, this should be GBUF_INNER_PARITY(parentBlockNumber + 1)
+ * to follow the rule described in spgist/README.)
+ * In addition, GBUF_NULLS can be OR'd in to get a page for storage of
+ * null-valued tuples.
+ *
+ * Note: these flag values are used as indexes into lastUsedPages.
+ */
+#define GBUF_LEAF				0x03
+#define GBUF_INNER_PARITY(x)	((x) % 3)
+#define GBUF_NULLS				0x04
+
+#define GBUF_PARITY_MASK		0x03
+#define GBUF_REQ_LEAF(flags)	(((flags) & GBUF_PARITY_MASK) == GBUF_LEAF)
+#define GBUF_REQ_NULLS(flags)	((flags) & GBUF_NULLS)
+
+/* spgutils.c */
+
+/* reloption parameters */
+#define SPGIST_MIN_FILLFACTOR			10
+#define SPGIST_DEFAULT_FILLFACTOR		80
+
+extern SpGistCache *spgGetCache(Relation index);
+extern TupleDesc getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType);
+extern void initSpGistState(SpGistState *state, Relation index);
+extern Buffer SpGistNewBuffer(Relation index);
+extern void SpGistUpdateMetaPage(Relation index);
+extern Buffer SpGistGetBuffer(Relation index, int flags,
+							  int needSpace, bool *isNew);
+extern void SpGistSetLastUsedPage(Relation index, Buffer buffer);
+extern void SpGistInitPage(Page page, uint16 f);
+extern void SpGistInitBuffer(Buffer b, uint16 f);
+extern void SpGistInitMetapage(Page page);
+extern unsigned int SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum);
+extern Size SpGistGetLeafTupleSize(TupleDesc tupleDescriptor,
+								   Datum *datums, bool *isnulls);
+extern SpGistLeafTuple spgFormLeafTuple(SpGistState *state,
+										ItemPointer heapPtr,
+										Datum *datums, bool *isnulls);
+extern SpGistNodeTuple spgFormNodeTuple(SpGistState *state,
+										Datum label, bool isnull);
+extern SpGistInnerTuple spgFormInnerTuple(SpGistState *state,
+										  bool hasPrefix, Datum prefix,
+										  int nNodes, SpGistNodeTuple *nodes);
+extern SpGistDeadTuple spgFormDeadTuple(SpGistState *state, int tupstate,
+										BlockNumber blkno, OffsetNumber offnum);
+extern void spgDeformLeafTuple(SpGistLeafTuple tup, TupleDesc tupleDescriptor,
+							   Datum *datums, bool *isnulls,
+							   bool keyColumnIsNull);
+extern Datum *spgExtractNodeLabels(SpGistState *state,
+								   SpGistInnerTuple innerTuple);
+extern OffsetNumber SpGistPageAddNewItem(SpGistState *state, Page page,
+										 Item item, Size size,
+										 OffsetNumber *startOffset,
+										 bool errorOK);
+extern bool spgproperty(Oid index_oid, int attno,
+						IndexAMProperty prop, const char *propname,
+						bool *res, bool *isnull);
+
+/* spgdoinsert.c */
+extern void spgUpdateNodeLink(SpGistInnerTuple tup, int nodeN,
+							  BlockNumber blkno, OffsetNumber offset);
+extern void spgPageIndexMultiDelete(SpGistState *state, Page page,
+									OffsetNumber *itemnos, int nitems,
+									int firststate, int reststate,
+									BlockNumber blkno, OffsetNumber offnum);
+extern bool spgdoinsert(Relation index, SpGistState *state,
+						ItemPointer heapPtr, Datum *datums, bool *isnulls);
+
+/* spgproc.c */
+extern double *spg_key_orderbys_distances(Datum key, bool isLeaf,
+										  ScanKey orderbys, int norderbys);
+extern BOX *box_copy(BOX *orig);
+
+#endif							/* SPGIST_PRIVATE_H */
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
new file mode 100644
index 0000000..69405b5
--- /dev/null
+++ b/src/include/access/spgxlog.h
@@ -0,0 +1,257 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgxlog.h
+ *	  xlog declarations for SP-GiST access method.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/spgxlog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPGXLOG_H
+#define SPGXLOG_H
+
+#include "access/xlogreader.h"
+#include "lib/stringinfo.h"
+#include "storage/off.h"
+
+/* XLOG record types for SPGiST */
+ /* #define XLOG_SPGIST_CREATE_INDEX       0x00 */	/* not used anymore */
+#define XLOG_SPGIST_ADD_LEAF		0x10
+#define XLOG_SPGIST_MOVE_LEAFS		0x20
+#define XLOG_SPGIST_ADD_NODE		0x30
+#define XLOG_SPGIST_SPLIT_TUPLE		0x40
+#define XLOG_SPGIST_PICKSPLIT		0x50
+#define XLOG_SPGIST_VACUUM_LEAF		0x60
+#define XLOG_SPGIST_VACUUM_ROOT		0x70
+#define XLOG_SPGIST_VACUUM_REDIRECT 0x80
+
+/*
+ * Some redo functions need an SpGistState, although only a few of its fields
+ * need to be valid.  spgxlogState carries the required info in xlog records.
+ * (See fillFakeState in spgxlog.c for more comments.)
+ */
+typedef struct spgxlogState
+{
+	TransactionId myXid;
+	bool		isBuild;
+} spgxlogState;
+
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
+typedef struct spgxlogAddLeaf
+{
+	bool		newPage;		/* init dest page? */
+	bool		storesNulls;	/* page is in the nulls tree? */
+	OffsetNumber offnumLeaf;	/* offset where leaf tuple gets placed */
+	OffsetNumber offnumHeadLeaf;	/* offset of head tuple in chain, if any */
+
+	OffsetNumber offnumParent;	/* where the parent downlink is, if any */
+	uint16		nodeI;
+
+	/* new leaf tuple follows (unaligned!) */
+} spgxlogAddLeaf;
+
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
+typedef struct spgxlogMoveLeafs
+{
+	uint16		nMoves;			/* number of tuples moved from source page */
+	bool		newPage;		/* init dest page? */
+	bool		replaceDead;	/* are we replacing a DEAD source tuple? */
+	bool		storesNulls;	/* pages are in the nulls tree? */
+
+	/* where the parent downlink is */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		array of deleted tuple numbers, length nMoves
+	 *		array of inserted tuple numbers, length nMoves + 1 or 1
+	 *		list of leaf tuples, length nMoves + 1 or 1 (unaligned!)
+	 *
+	 * Note: if replaceDead is true then there is only one inserted tuple
+	 * number and only one leaf tuple in the data, because we are not copying
+	 * the dead tuple from the source
+	 *----------
+	 */
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} spgxlogMoveLeafs;
+
+#define SizeOfSpgxlogMoveLeafs	offsetof(spgxlogMoveLeafs, offsets)
+
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ *				 the old and new
+ */
+typedef struct spgxlogAddNode
+{
+	/*
+	 * Offset of the original inner tuple, in the original page (on backup
+	 * block 0).
+	 */
+	OffsetNumber offnum;
+
+	/*
+	 * Offset of the new tuple, on the new page (on backup block 1). Invalid,
+	 * if we overwrote the old tuple in the original page).
+	 */
+	OffsetNumber offnumNew;
+	bool		newPage;		/* init new page? */
+
+	/*----
+	 * Where is the parent downlink? parentBlk indicates which page it's on,
+	 * and offnumParent is the offset within the page. The possible values for
+	 * parentBlk are:
+	 *
+	 * 0: parent == original page
+	 * 1: parent == new page
+	 * 2: parent == different page (blk ref 2)
+	 * -1: parent not updated
+	 *----
+	 */
+	int8		parentBlk;
+	OffsetNumber offnumParent;	/* offset within the parent page */
+
+	uint16		nodeI;
+
+	spgxlogState stateSrc;
+
+	/*
+	 * updated inner tuple follows (unaligned!)
+	 */
+} spgxlogAddNode;
+
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
+typedef struct spgxlogSplitTuple
+{
+	/* where the prefix tuple goes */
+	OffsetNumber offnumPrefix;
+
+	/* where the postfix tuple goes */
+	OffsetNumber offnumPostfix;
+	bool		newPage;		/* need to init that page? */
+	bool		postfixBlkSame; /* was postfix tuple put on same page as
+								 * prefix? */
+
+	/*
+	 * new prefix inner tuple follows, then new postfix inner tuple (both are
+	 * unaligned!)
+	 */
+} spgxlogSplitTuple;
+
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
+typedef struct spgxlogPickSplit
+{
+	bool		isRootSplit;
+
+	uint16		nDelete;		/* n to delete from Src */
+	uint16		nInsert;		/* n to insert on Src and/or Dest */
+	bool		initSrc;		/* re-init the Src page? */
+	bool		initDest;		/* re-init the Dest page? */
+
+	/* where to put new inner tuple */
+	OffsetNumber offnumInner;
+	bool		initInner;		/* re-init the Inner page? */
+
+	bool		storesNulls;	/* pages are in the nulls tree? */
+
+	/* where the parent downlink is, if any */
+	bool		innerIsParent;	/* is parent the same as inner page? */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		array of deleted tuple numbers, length nDelete
+	 *		array of inserted tuple numbers, length nInsert
+	 *		array of page selector bytes for inserted tuples, length nInsert
+	 *		new inner tuple (unaligned!)
+	 *		list of leaf tuples, length nInsert (unaligned!)
+	 *----------
+	 */
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} spgxlogPickSplit;
+
+#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
+
+typedef struct spgxlogVacuumLeaf
+{
+	uint16		nDead;			/* number of tuples to become DEAD */
+	uint16		nPlaceholder;	/* number of tuples to become PLACEHOLDER */
+	uint16		nMove;			/* number of tuples to move */
+	uint16		nChain;			/* number of tuples to re-chain */
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		tuple numbers to become DEAD
+	 *		tuple numbers to become PLACEHOLDER
+	 *		tuple numbers to move from (and replace with PLACEHOLDER)
+	 *		tuple numbers to move to (replacing what is there)
+	 *		tuple numbers to update nextOffset links of
+	 *		tuple numbers to insert in nextOffset links
+	 *----------
+	 */
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} spgxlogVacuumLeaf;
+
+#define SizeOfSpgxlogVacuumLeaf offsetof(spgxlogVacuumLeaf, offsets)
+
+typedef struct spgxlogVacuumRoot
+{
+	/* vacuum a root page when it is also a leaf */
+	uint16		nDelete;		/* number of tuples to delete */
+
+	spgxlogState stateSrc;
+
+	/* offsets of tuples to delete follow */
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} spgxlogVacuumRoot;
+
+#define SizeOfSpgxlogVacuumRoot offsetof(spgxlogVacuumRoot, offsets)
+
+typedef struct spgxlogVacuumRedirect
+{
+	uint16		nToPlaceholder; /* number of redirects to make placeholders */
+	OffsetNumber firstPlaceholder;	/* first placeholder tuple to remove */
+	TransactionId newestRedirectXid;	/* newest XID of removed redirects */
+
+	/* offsets of redirect tuples to make placeholders follow */
+	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+} spgxlogVacuumRedirect;
+
+#define SizeOfSpgxlogVacuumRedirect offsetof(spgxlogVacuumRedirect, offsets)
+
+extern void spg_redo(XLogReaderState *record);
+extern void spg_desc(StringInfo buf, XLogReaderState *record);
+extern const char *spg_identify(uint8 info);
+extern void spg_xlog_startup(void);
+extern void spg_xlog_cleanup(void);
+extern void spg_mask(char *pagedata, BlockNumber blkno);
+
+#endif							/* SPGXLOG_H */
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
new file mode 100644
index 0000000..fad4b69
--- /dev/null
+++ b/src/include/access/stratnum.h
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * stratnum.h
+ *	  POSTGRES strategy number definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/stratnum.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STRATNUM_H
+#define STRATNUM_H
+
+/*
+ * Strategy numbers identify the semantics that particular operators have
+ * with respect to particular operator classes.  In some cases a strategy
+ * subtype (an OID) is used as further information.
+ */
+typedef uint16 StrategyNumber;
+
+#define InvalidStrategy ((StrategyNumber) 0)
+
+/*
+ * Strategy numbers for B-tree indexes.
+ */
+#define BTLessStrategyNumber			1
+#define BTLessEqualStrategyNumber		2
+#define BTEqualStrategyNumber			3
+#define BTGreaterEqualStrategyNumber	4
+#define BTGreaterStrategyNumber			5
+
+#define BTMaxStrategyNumber				5
+
+/*
+ *	Strategy numbers for hash indexes. There's only one valid strategy for
+ *	hashing: equality.
+ */
+#define HTEqualStrategyNumber			1
+
+#define HTMaxStrategyNumber				1
+
+/*
+ * Strategy numbers common to (some) GiST, SP-GiST and BRIN opclasses.
+ *
+ * The first few of these come from the R-Tree indexing method (hence the
+ * names); the others have been added over time as they have been needed.
+ */
+#define RTLeftStrategyNumber			1	/* for << */
+#define RTOverLeftStrategyNumber		2	/* for &< */
+#define RTOverlapStrategyNumber			3	/* for && */
+#define RTOverRightStrategyNumber		4	/* for &> */
+#define RTRightStrategyNumber			5	/* for >> */
+#define RTSameStrategyNumber			6	/* for ~= */
+#define RTContainsStrategyNumber		7	/* for @> */
+#define RTContainedByStrategyNumber		8	/* for <@ */
+#define RTOverBelowStrategyNumber		9	/* for &<| */
+#define RTBelowStrategyNumber			10	/* for <<| */
+#define RTAboveStrategyNumber			11	/* for |>> */
+#define RTOverAboveStrategyNumber		12	/* for |&> */
+#define RTOldContainsStrategyNumber		13	/* for old spelling of @> */
+#define RTOldContainedByStrategyNumber	14	/* for old spelling of <@ */
+#define RTKNNSearchStrategyNumber		15	/* for <-> (distance) */
+#define RTContainsElemStrategyNumber	16	/* for range types @> elem */
+#define RTAdjacentStrategyNumber		17	/* for -|- */
+#define RTEqualStrategyNumber			18	/* for = */
+#define RTNotEqualStrategyNumber		19	/* for != */
+#define RTLessStrategyNumber			20	/* for < */
+#define RTLessEqualStrategyNumber		21	/* for <= */
+#define RTGreaterStrategyNumber			22	/* for > */
+#define RTGreaterEqualStrategyNumber	23	/* for >= */
+#define RTSubStrategyNumber				24	/* for inet >> */
+#define RTSubEqualStrategyNumber		25	/* for inet <<= */
+#define RTSuperStrategyNumber			26	/* for inet << */
+#define RTSuperEqualStrategyNumber		27	/* for inet >>= */
+#define RTPrefixStrategyNumber			28	/* for text ^@ */
+#define RTOldBelowStrategyNumber		29	/* for old spelling of <<| */
+#define RTOldAboveStrategyNumber		30	/* for old spelling of |>> */
+
+#define RTMaxStrategyNumber				30
+
+
+#endif							/* STRATNUM_H */
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
new file mode 100644
index 0000000..d0ab44a
--- /dev/null
+++ b/src/include/access/subtrans.h
@@ -0,0 +1,29 @@
+/*
+ * subtrans.h
+ *
+ * PostgreSQL subtransaction-log manager
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/subtrans.h
+ */
+#ifndef SUBTRANS_H
+#define SUBTRANS_H
+
+/* Number of SLRU buffers to use for subtrans */
+#define NUM_SUBTRANS_BUFFERS	32
+
+extern void SubTransSetParent(TransactionId xid, TransactionId parent);
+extern TransactionId SubTransGetParent(TransactionId xid);
+extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
+
+extern Size SUBTRANSShmemSize(void);
+extern void SUBTRANSShmemInit(void);
+extern void BootStrapSUBTRANS(void);
+extern void StartupSUBTRANS(TransactionId oldestActiveXID);
+extern void CheckPointSUBTRANS(void);
+extern void ExtendSUBTRANS(TransactionId newestXact);
+extern void TruncateSUBTRANS(TransactionId oldestXact);
+
+#endif							/* SUBTRANS_H */
diff --git a/src/include/access/syncscan.h b/src/include/access/syncscan.h
new file mode 100644
index 0000000..7947f3c
--- /dev/null
+++ b/src/include/access/syncscan.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * syncscan.h
+ *    POSTGRES synchronous scan support functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/syncscan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SYNCSCAN_H
+#define SYNCSCAN_H
+
+#include "storage/block.h"
+#include "utils/relcache.h"
+
+extern void ss_report_location(Relation rel, BlockNumber location);
+extern BlockNumber ss_get_location(Relation rel, BlockNumber relnblocks);
+extern void SyncScanShmemInit(void);
+extern Size SyncScanShmemSize(void);
+
+#endif
diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h
new file mode 100644
index 0000000..968257b
--- /dev/null
+++ b/src/include/access/sysattr.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * sysattr.h
+ *	  POSTGRES system attribute definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/sysattr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SYSATTR_H
+#define SYSATTR_H
+
+
+/*
+ * Attribute numbers for the system-defined attributes
+ */
+#define SelfItemPointerAttributeNumber			(-1)
+#define MinTransactionIdAttributeNumber			(-2)
+#define MinCommandIdAttributeNumber				(-3)
+#define MaxTransactionIdAttributeNumber			(-4)
+#define MaxCommandIdAttributeNumber				(-5)
+#define TableOidAttributeNumber					(-6)
+#define FirstLowInvalidHeapAttributeNumber		(-7)
+
+#endif							/* SYSATTR_H */
diff --git a/src/include/access/table.h b/src/include/access/table.h
new file mode 100644
index 0000000..5e4d9dd
--- /dev/null
+++ b/src/include/access/table.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * table.h
+ *	  Generic routines for table related code.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLE_H
+#define TABLE_H
+
+#include "nodes/primnodes.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+
+extern Relation table_open(Oid relationId, LOCKMODE lockmode);
+extern Relation table_openrv(const RangeVar *relation, LOCKMODE lockmode);
+extern Relation table_openrv_extended(const RangeVar *relation,
+									  LOCKMODE lockmode, bool missing_ok);
+extern Relation try_table_open(Oid relationId, LOCKMODE lockmode);
+extern void table_close(Relation relation, LOCKMODE lockmode);
+
+#endif							/* TABLE_H */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
new file mode 100644
index 0000000..9f1e4a1
--- /dev/null
+++ b/src/include/access/tableam.h
@@ -0,0 +1,2075 @@
+/*-------------------------------------------------------------------------
+ *
+ * tableam.h
+ *	  POSTGRES table access method definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tableam.h
+ *
+ * NOTES
+ *		See tableam.sgml for higher level documentation.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLEAM_H
+#define TABLEAM_H
+
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/xact.h"
+#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/snapshot.h"
+
+
+#define DEFAULT_TABLE_ACCESS_METHOD	"heap"
+
+/* GUCs */
+extern char *default_table_access_method;
+extern bool synchronize_seqscans;
+
+
+struct BulkInsertStateData;
+struct IndexInfo;
+struct SampleScanState;
+struct TBMIterateResult;
+struct VacuumParams;
+struct ValidateIndexState;
+
+/*
+ * Bitmask values for the flags argument to the scan_begin callback.
+ */
+typedef enum ScanOptions
+{
+	/* one of SO_TYPE_* may be specified */
+	SO_TYPE_SEQSCAN = 1 << 0,
+	SO_TYPE_BITMAPSCAN = 1 << 1,
+	SO_TYPE_SAMPLESCAN = 1 << 2,
+	SO_TYPE_TIDSCAN = 1 << 3,
+	SO_TYPE_TIDRANGESCAN = 1 << 4,
+	SO_TYPE_ANALYZE = 1 << 5,
+
+	/* several of SO_ALLOW_* may be specified */
+	/* allow or disallow use of access strategy */
+	SO_ALLOW_STRAT = 1 << 6,
+	/* report location to syncscan logic? */
+	SO_ALLOW_SYNC = 1 << 7,
+	/* verify visibility page-at-a-time? */
+	SO_ALLOW_PAGEMODE = 1 << 8,
+
+	/* unregister snapshot at scan end? */
+	SO_TEMP_SNAPSHOT = 1 << 9
+} ScanOptions;
+
+/*
+ * Result codes for table_{update,delete,lock_tuple}, and for visibility
+ * routines inside table AMs.
+ */
+typedef enum TM_Result
+{
+	/*
+	 * Signals that the action succeeded (i.e. update/delete performed, lock
+	 * was acquired)
+	 */
+	TM_Ok,
+
+	/* The affected tuple wasn't visible to the relevant snapshot */
+	TM_Invisible,
+
+	/* The affected tuple was already modified by the calling backend */
+	TM_SelfModified,
+
+	/*
+	 * The affected tuple was updated by another transaction. This includes
+	 * the case where tuple was moved to another partition.
+	 */
+	TM_Updated,
+
+	/* The affected tuple was deleted by another transaction */
+	TM_Deleted,
+
+	/*
+	 * The affected tuple is currently being modified by another session. This
+	 * will only be returned if table_(update/delete/lock_tuple) are
+	 * instructed not to wait.
+	 */
+	TM_BeingModified,
+
+	/* lock couldn't be acquired, action skipped. Only used by lock_tuple */
+	TM_WouldBlock
+} TM_Result;
+
+/*
+ * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
+ * because the target tuple is already outdated, they fill in this struct to
+ * provide information to the caller about what happened.
+ *
+ * ctid is the target's ctid link: it is the same as the target's TID if the
+ * target was deleted, or the location of the replacement tuple if the target
+ * was updated.
+ *
+ * xmax is the outdating transaction's XID.  If the caller wants to visit the
+ * replacement tuple, it must check that this matches before believing the
+ * replacement is really a match.
+ *
+ * cmax is the outdating command's CID, but only when the failure code is
+ * TM_SelfModified (i.e., something in the current transaction outdated the
+ * tuple); otherwise cmax is zero.  (We make this restriction because
+ * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
+ * transactions.)
+ */
+typedef struct TM_FailureData
+{
+	ItemPointerData ctid;
+	TransactionId xmax;
+	CommandId	cmax;
+	bool		traversed;
+} TM_FailureData;
+
+/*
+ * State used when calling table_index_delete_tuples().
+ *
+ * Represents the status of table tuples, referenced by table TID and taken by
+ * index AM from index tuples.  State consists of high level parameters of the
+ * deletion operation, plus two mutable palloc()'d arrays for information
+ * about the status of individual table tuples.  These are conceptually one
+ * single array.  Using two arrays keeps the TM_IndexDelete struct small,
+ * which makes sorting the first array (the deltids array) fast.
+ *
+ * Some index AM callers perform simple index tuple deletion (by specifying
+ * bottomup = false), and include only known-dead deltids.  These known-dead
+ * entries are all marked knowndeletable = true directly (typically these are
+ * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
+ *
+ * Callers that specify bottomup = true are "bottom-up index deletion"
+ * callers.  The considerations for the tableam are more subtle with these
+ * callers because they ask the tableam to perform highly speculative work,
+ * and might only expect the tableam to check a small fraction of all entries.
+ * Caller is not allowed to specify knowndeletable = true for any entry
+ * because everything is highly speculative.  Bottom-up caller provides
+ * context and hints to tableam -- see comments below for details on how index
+ * AMs and tableams should coordinate during bottom-up index deletion.
+ *
+ * Simple index deletion callers may ask the tableam to perform speculative
+ * work, too.  This is a little like bottom-up deletion, but not too much.
+ * The tableam will only perform speculative work when it's practically free
+ * to do so in passing for simple deletion caller (while always performing
+ * whatever work is is needed to enable knowndeletable/LP_DEAD index tuples to
+ * be deleted within index AM).  This is the real reason why it's possible for
+ * simple index deletion caller to specify knowndeletable = false up front
+ * (this means "check if it's possible for me to delete corresponding index
+ * tuple when it's cheap to do so in passing").  The index AM should only
+ * include "extra" entries for index tuples whose TIDs point to a table block
+ * that tableam is expected to have to visit anyway (in the event of a block
+ * orientated tableam).  The tableam isn't strictly obligated to check these
+ * "extra" TIDs, but a block-based AM should always manage to do so in
+ * practice.
+ *
+ * The final contents of the deltids/status arrays are interesting to callers
+ * that ask tableam to perform speculative work (i.e. when _any_ items have
+ * knowndeletable set to false up front).  These index AM callers will
+ * naturally need to consult final state to determine which index tuples are
+ * in fact deletable.
+ *
+ * The index AM can keep track of which index tuple relates to which deltid by
+ * setting idxoffnum (and/or relying on each entry being uniquely identifiable
+ * using tid), which is important when the final contents of the array will
+ * need to be interpreted -- the array can shrink from initial size after
+ * tableam processing and/or have entries in a new order (tableam may sort
+ * deltids array for its own reasons).  Bottom-up callers may find that final
+ * ndeltids is 0 on return from call to tableam, in which case no index tuple
+ * deletions are possible.  Simple deletion callers can rely on any entries
+ * they know to be deletable appearing in the final array as deletable.
+ */
+typedef struct TM_IndexDelete
+{
+	ItemPointerData tid;		/* table TID from index tuple */
+	int16		id;				/* Offset into TM_IndexStatus array */
+} TM_IndexDelete;
+
+typedef struct TM_IndexStatus
+{
+	OffsetNumber idxoffnum;		/* Index am page offset number */
+	bool		knowndeletable; /* Currently known to be deletable? */
+
+	/* Bottom-up index deletion specific fields follow */
+	bool		promising;		/* Promising (duplicate) index tuple? */
+	int16		freespace;		/* Space freed in index if deleted */
+} TM_IndexStatus;
+
+/*
+ * Index AM/tableam coordination is central to the design of bottom-up index
+ * deletion.  The index AM provides hints about where to look to the tableam
+ * by marking some entries as "promising".  Index AM does this with duplicate
+ * index tuples that are strongly suspected to be old versions left behind by
+ * UPDATEs that did not logically modify indexed values.  Index AM may find it
+ * helpful to only mark entries as promising when they're thought to have been
+ * affected by such an UPDATE in the recent past.
+ *
+ * Bottom-up index deletion casts a wide net at first, usually by including
+ * all TIDs on a target index page.  It is up to the tableam to worry about
+ * the cost of checking transaction status information.  The tableam is in
+ * control, but needs careful guidance from the index AM.  Index AM requests
+ * that bottomupfreespace target be met, while tableam measures progress
+ * towards that goal by tallying the per-entry freespace value for known
+ * deletable entries. (All !bottomup callers can just set these space related
+ * fields to zero.)
+ */
+typedef struct TM_IndexDeleteOp
+{
+	bool		bottomup;		/* Bottom-up (not simple) deletion? */
+	int			bottomupfreespace;	/* Bottom-up space target */
+
+	/* Mutable per-TID information follows (index AM initializes entries) */
+	int			ndeltids;		/* Current # of deltids/status elements */
+	TM_IndexDelete *deltids;
+	TM_IndexStatus *status;
+} TM_IndexDeleteOp;
+
+/* "options" flag bits for table_tuple_insert */
+/* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
+#define TABLE_INSERT_SKIP_FSM		0x0002
+#define TABLE_INSERT_FROZEN			0x0004
+#define TABLE_INSERT_NO_LOGICAL		0x0008
+
+/* flag bits for table_tuple_lock */
+/* Follow tuples whose update is in progress if lock modes don't conflict  */
+#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS	(1 << 0)
+/* Follow update chain and lock latest version of tuple */
+#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION		(1 << 1)
+
+
+/* Typedef for callback function for table_index_build_scan */
+typedef void (*IndexBuildCallback) (Relation index,
+									ItemPointer tid,
+									Datum *values,
+									bool *isnull,
+									bool tupleIsAlive,
+									void *state);
+
+/*
+ * API struct for a table AM.  Note this must be allocated in a
+ * server-lifetime manner, typically as a static const struct, which then gets
+ * returned by FormData_pg_am.amhandler.
+ *
+ * In most cases it's not appropriate to call the callbacks directly, use the
+ * table_* wrapper functions instead.
+ *
+ * GetTableAmRoutine() asserts that required callbacks are filled in, remember
+ * to update when adding a callback.
+ */
+typedef struct TableAmRoutine
+{
+	/* this must be set to T_TableAmRoutine */
+	NodeTag		type;
+
+
+	/* ------------------------------------------------------------------------
+	 * Slot related callbacks.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Return slot implementation suitable for storing a tuple of this AM.
+	 */
+	const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
+
+
+	/* ------------------------------------------------------------------------
+	 * Table scan callbacks.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Start a scan of `rel`.  The callback has to return a TableScanDesc,
+	 * which will typically be embedded in a larger, AM specific, struct.
+	 *
+	 * If nkeys != 0, the results need to be filtered by those scan keys.
+	 *
+	 * pscan, if not NULL, will have already been initialized with
+	 * parallelscan_initialize(), and has to be for the same relation. Will
+	 * only be set coming from table_beginscan_parallel().
+	 *
+	 * `flags` is a bitmask indicating the type of scan (ScanOptions's
+	 * SO_TYPE_*, currently only one may be specified), options controlling
+	 * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
+	 * specified, an AM may ignore unsupported ones) and whether the snapshot
+	 * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
+	 */
+	TableScanDesc (*scan_begin) (Relation rel,
+								 Snapshot snapshot,
+								 int nkeys, struct ScanKeyData *key,
+								 ParallelTableScanDesc pscan,
+								 uint32 flags);
+
+	/*
+	 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
+	 * TableScanDesc.rs_snapshot needs to be unregistered.
+	 */
+	void		(*scan_end) (TableScanDesc scan);
+
+	/*
+	 * Restart relation scan.  If set_params is set to true, allow_{strat,
+	 * sync, pagemode} (see scan_begin) changes should be taken into account.
+	 */
+	void		(*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
+								bool set_params, bool allow_strat,
+								bool allow_sync, bool allow_pagemode);
+
+	/*
+	 * Return next tuple from `scan`, store in slot.
+	 */
+	bool		(*scan_getnextslot) (TableScanDesc scan,
+									 ScanDirection direction,
+									 TupleTableSlot *slot);
+
+	/*-----------
+	 * Optional functions to provide scanning for ranges of ItemPointers.
+	 * Implementations must either provide both of these functions, or neither
+	 * of them.
+	 *
+	 * Implementations of scan_set_tidrange must themselves handle
+	 * ItemPointers of any value. i.e, they must handle each of the following:
+	 *
+	 * 1) mintid or maxtid is beyond the end of the table; and
+	 * 2) mintid is above maxtid; and
+	 * 3) item offset for mintid or maxtid is beyond the maximum offset
+	 * allowed by the AM.
+	 *
+	 * Implementations can assume that scan_set_tidrange is always called
+	 * before can_getnextslot_tidrange or after scan_rescan and before any
+	 * further calls to scan_getnextslot_tidrange.
+	 */
+	void		(*scan_set_tidrange) (TableScanDesc scan,
+									  ItemPointer mintid,
+									  ItemPointer maxtid);
+
+	/*
+	 * Return next tuple from `scan` that's in the range of TIDs defined by
+	 * scan_set_tidrange.
+	 */
+	bool		(*scan_getnextslot_tidrange) (TableScanDesc scan,
+											  ScanDirection direction,
+											  TupleTableSlot *slot);
+
+	/* ------------------------------------------------------------------------
+	 * Parallel table scan related functions.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Estimate the size of shared memory needed for a parallel scan of this
+	 * relation. The snapshot does not need to be accounted for.
+	 */
+	Size		(*parallelscan_estimate) (Relation rel);
+
+	/*
+	 * Initialize ParallelTableScanDesc for a parallel scan of this relation.
+	 * `pscan` will be sized according to parallelscan_estimate() for the same
+	 * relation.
+	 */
+	Size		(*parallelscan_initialize) (Relation rel,
+											ParallelTableScanDesc pscan);
+
+	/*
+	 * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
+	 * when `pscan` was initialized by parallelscan_initialize.
+	 */
+	void		(*parallelscan_reinitialize) (Relation rel,
+											  ParallelTableScanDesc pscan);
+
+
+	/* ------------------------------------------------------------------------
+	 * Index Scan Callbacks
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Prepare to fetch tuples from the relation, as needed when fetching
+	 * tuples for an index scan.  The callback has to return an
+	 * IndexFetchTableData, which the AM will typically embed in a larger
+	 * structure with additional information.
+	 *
+	 * Tuples for an index scan can then be fetched via index_fetch_tuple.
+	 */
+	struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
+
+	/*
+	 * Reset index fetch. Typically this will release cross index fetch
+	 * resources held in IndexFetchTableData.
+	 */
+	void		(*index_fetch_reset) (struct IndexFetchTableData *data);
+
+	/*
+	 * Release resources and deallocate index fetch.
+	 */
+	void		(*index_fetch_end) (struct IndexFetchTableData *data);
+
+	/*
+	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
+	 * according to `snapshot`. If a tuple was found and passed the visibility
+	 * test, return true, false otherwise.
+	 *
+	 * Note that AMs that do not necessarily update indexes when indexed
+	 * columns do not change, need to return the current/correct version of
+	 * the tuple that is visible to the snapshot, even if the tid points to an
+	 * older version of the tuple.
+	 *
+	 * *call_again is false on the first call to index_fetch_tuple for a tid.
+	 * If there potentially is another tuple matching the tid, *call_again
+	 * needs to be set to true by index_fetch_tuple, signaling to the caller
+	 * that index_fetch_tuple should be called again for the same tid.
+	 *
+	 * *all_dead, if all_dead is not NULL, should be set to true by
+	 * index_fetch_tuple iff it is guaranteed that no backend needs to see
+	 * that tuple. Index AMs can use that to avoid returning that tid in
+	 * future searches.
+	 */
+	bool		(*index_fetch_tuple) (struct IndexFetchTableData *scan,
+									  ItemPointer tid,
+									  Snapshot snapshot,
+									  TupleTableSlot *slot,
+									  bool *call_again, bool *all_dead);
+
+
+	/* ------------------------------------------------------------------------
+	 * Callbacks for non-modifying operations on individual tuples
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
+	 * according to `snapshot`. If a tuple was found and passed the visibility
+	 * test, returns true, false otherwise.
+	 */
+	bool		(*tuple_fetch_row_version) (Relation rel,
+											ItemPointer tid,
+											Snapshot snapshot,
+											TupleTableSlot *slot);
+
+	/*
+	 * Is tid valid for a scan of this relation.
+	 */
+	bool		(*tuple_tid_valid) (TableScanDesc scan,
+									ItemPointer tid);
+
+	/*
+	 * Return the latest version of the tuple at `tid`, by updating `tid` to
+	 * point at the newest version.
+	 */
+	void		(*tuple_get_latest_tid) (TableScanDesc scan,
+										 ItemPointer tid);
+
+	/*
+	 * Does the tuple in `slot` satisfy `snapshot`?  The slot needs to be of
+	 * the appropriate type for the AM.
+	 */
+	bool		(*tuple_satisfies_snapshot) (Relation rel,
+											 TupleTableSlot *slot,
+											 Snapshot snapshot);
+
+	/* see table_index_delete_tuples() */
+	TransactionId (*index_delete_tuples) (Relation rel,
+										  TM_IndexDeleteOp *delstate);
+
+
+	/* ------------------------------------------------------------------------
+	 * Manipulations of physical tuples.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/* see table_tuple_insert() for reference about parameters */
+	void		(*tuple_insert) (Relation rel, TupleTableSlot *slot,
+								 CommandId cid, int options,
+								 struct BulkInsertStateData *bistate);
+
+	/* see table_tuple_insert_speculative() for reference about parameters */
+	void		(*tuple_insert_speculative) (Relation rel,
+											 TupleTableSlot *slot,
+											 CommandId cid,
+											 int options,
+											 struct BulkInsertStateData *bistate,
+											 uint32 specToken);
+
+	/* see table_tuple_complete_speculative() for reference about parameters */
+	void		(*tuple_complete_speculative) (Relation rel,
+											   TupleTableSlot *slot,
+											   uint32 specToken,
+											   bool succeeded);
+
+	/* see table_multi_insert() for reference about parameters */
+	void		(*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
+								 CommandId cid, int options, struct BulkInsertStateData *bistate);
+
+	/* see table_tuple_delete() for reference about parameters */
+	TM_Result	(*tuple_delete) (Relation rel,
+								 ItemPointer tid,
+								 CommandId cid,
+								 Snapshot snapshot,
+								 Snapshot crosscheck,
+								 bool wait,
+								 TM_FailureData *tmfd,
+								 bool changingPart);
+
+	/* see table_tuple_update() for reference about parameters */
+	TM_Result	(*tuple_update) (Relation rel,
+								 ItemPointer otid,
+								 TupleTableSlot *slot,
+								 CommandId cid,
+								 Snapshot snapshot,
+								 Snapshot crosscheck,
+								 bool wait,
+								 TM_FailureData *tmfd,
+								 LockTupleMode *lockmode,
+								 bool *update_indexes);
+
+	/* see table_tuple_lock() for reference about parameters */
+	TM_Result	(*tuple_lock) (Relation rel,
+							   ItemPointer tid,
+							   Snapshot snapshot,
+							   TupleTableSlot *slot,
+							   CommandId cid,
+							   LockTupleMode mode,
+							   LockWaitPolicy wait_policy,
+							   uint8 flags,
+							   TM_FailureData *tmfd);
+
+	/*
+	 * Perform operations necessary to complete insertions made via
+	 * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
+	 * access methods ceased to use this.
+	 *
+	 * Typically callers of tuple_insert and multi_insert will just pass all
+	 * the flags that apply to them, and each AM has to decide which of them
+	 * make sense for it, and then only take actions in finish_bulk_insert for
+	 * those flags, and ignore others.
+	 *
+	 * Optional callback.
+	 */
+	void		(*finish_bulk_insert) (Relation rel, int options);
+
+
+	/* ------------------------------------------------------------------------
+	 * DDL related functionality.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * This callback needs to create a new relation filenode for `rel`, with
+	 * appropriate durability behaviour for `persistence`.
+	 *
+	 * Note that only the subset of the relcache filled by
+	 * RelationBuildLocalRelation() can be relied upon and that the relation's
+	 * catalog entries will either not yet exist (new relation), or will still
+	 * reference the old relfilenode.
+	 *
+	 * As output *freezeXid, *minmulti must be set to the values appropriate
+	 * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
+	 * fields to be filled they can be set to InvalidTransactionId and
+	 * InvalidMultiXactId, respectively.
+	 *
+	 * See also table_relation_set_new_filenode().
+	 */
+	void		(*relation_set_new_filenode) (Relation rel,
+											  const RelFileNode *newrnode,
+											  char persistence,
+											  TransactionId *freezeXid,
+											  MultiXactId *minmulti);
+
+	/*
+	 * This callback needs to remove all contents from `rel`'s current
+	 * relfilenode. No provisions for transactional behaviour need to be made.
+	 * Often this can be implemented by truncating the underlying storage to
+	 * its minimal size.
+	 *
+	 * See also table_relation_nontransactional_truncate().
+	 */
+	void		(*relation_nontransactional_truncate) (Relation rel);
+
+	/*
+	 * See table_relation_copy_data().
+	 *
+	 * This can typically be implemented by directly copying the underlying
+	 * storage, unless it contains references to the tablespace internally.
+	 */
+	void		(*relation_copy_data) (Relation rel,
+									   const RelFileNode *newrnode);
+
+	/* See table_relation_copy_for_cluster() */
+	void		(*relation_copy_for_cluster) (Relation NewTable,
+											  Relation OldTable,
+											  Relation OldIndex,
+											  bool use_sort,
+											  TransactionId OldestXmin,
+											  TransactionId *xid_cutoff,
+											  MultiXactId *multi_cutoff,
+											  double *num_tuples,
+											  double *tups_vacuumed,
+											  double *tups_recently_dead);
+
+	/*
+	 * React to VACUUM command on the relation. The VACUUM can be triggered by
+	 * a user or by autovacuum. The specific actions performed by the AM will
+	 * depend heavily on the individual AM.
+	 *
+	 * On entry a transaction is already established, and the relation is
+	 * locked with a ShareUpdateExclusive lock.
+	 *
+	 * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
+	 * this routine, even if (for ANALYZE) it is part of the same VACUUM
+	 * command.
+	 *
+	 * There probably, in the future, needs to be a separate callback to
+	 * integrate with autovacuum's scheduling.
+	 */
+	void		(*relation_vacuum) (Relation rel,
+									struct VacuumParams *params,
+									BufferAccessStrategy bstrategy);
+
+	/*
+	 * Prepare to analyze block `blockno` of `scan`. The scan has been started
+	 * with table_beginscan_analyze().  See also
+	 * table_scan_analyze_next_block().
+	 *
+	 * The callback may acquire resources like locks that are held until
+	 * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
+	 * to hold a lock until all tuples on a block have been analyzed by
+	 * scan_analyze_next_tuple.
+	 *
+	 * The callback can return false if the block is not suitable for
+	 * sampling, e.g. because it's a metapage that could never contain tuples.
+	 *
+	 * XXX: This obviously is primarily suited for block-based AMs. It's not
+	 * clear what a good interface for non block based AMs would be, so there
+	 * isn't one yet.
+	 */
+	bool		(*scan_analyze_next_block) (TableScanDesc scan,
+											BlockNumber blockno,
+											BufferAccessStrategy bstrategy);
+
+	/*
+	 * See table_scan_analyze_next_tuple().
+	 *
+	 * Not every AM might have a meaningful concept of dead rows, in which
+	 * case it's OK to not increment *deadrows - but note that that may
+	 * influence autovacuum scheduling (see comment for relation_vacuum
+	 * callback).
+	 */
+	bool		(*scan_analyze_next_tuple) (TableScanDesc scan,
+											TransactionId OldestXmin,
+											double *liverows,
+											double *deadrows,
+											TupleTableSlot *slot);
+
+	/* see table_index_build_range_scan for reference about parameters */
+	double		(*index_build_range_scan) (Relation table_rel,
+										   Relation index_rel,
+										   struct IndexInfo *index_info,
+										   bool allow_sync,
+										   bool anyvisible,
+										   bool progress,
+										   BlockNumber start_blockno,
+										   BlockNumber numblocks,
+										   IndexBuildCallback callback,
+										   void *callback_state,
+										   TableScanDesc scan);
+
+	/* see table_index_validate_scan for reference about parameters */
+	void		(*index_validate_scan) (Relation table_rel,
+										Relation index_rel,
+										struct IndexInfo *index_info,
+										Snapshot snapshot,
+										struct ValidateIndexState *state);
+
+
+	/* ------------------------------------------------------------------------
+	 * Miscellaneous functions.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * See table_relation_size().
+	 *
+	 * Note that currently a few callers use the MAIN_FORKNUM size to figure
+	 * out the range of potentially interesting blocks (brin, analyze). It's
+	 * probable that we'll need to revise the interface for those at some
+	 * point.
+	 */
+	uint64		(*relation_size) (Relation rel, ForkNumber forkNumber);
+
+
+	/*
+	 * This callback should return true if the relation requires a TOAST table
+	 * and false if it does not.  It may wish to examine the relation's tuple
+	 * descriptor before making a decision, but if it uses some other method
+	 * of storing large values (or if it does not support them) it can simply
+	 * return false.
+	 */
+	bool		(*relation_needs_toast_table) (Relation rel);
+
+	/*
+	 * This callback should return the OID of the table AM that implements
+	 * TOAST tables for this AM.  If the relation_needs_toast_table callback
+	 * always returns false, this callback is not required.
+	 */
+	Oid			(*relation_toast_am) (Relation rel);
+
+	/*
+	 * This callback is invoked when detoasting a value stored in a toast
+	 * table implemented by this AM.  See table_relation_fetch_toast_slice()
+	 * for more details.
+	 */
+	void		(*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
+											   int32 attrsize,
+											   int32 sliceoffset,
+											   int32 slicelength,
+											   struct varlena *result);
+
+
+	/* ------------------------------------------------------------------------
+	 * Planner related functions.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * See table_relation_estimate_size().
+	 *
+	 * While block oriented, it shouldn't be too hard for an AM that doesn't
+	 * internally use blocks to convert into a usable representation.
+	 *
+	 * This differs from the relation_size callback by returning size
+	 * estimates (both relation size and tuple count) for planning purposes,
+	 * rather than returning a currently correct estimate.
+	 */
+	void		(*relation_estimate_size) (Relation rel, int32 *attr_widths,
+										   BlockNumber *pages, double *tuples,
+										   double *allvisfrac);
+
+
+	/* ------------------------------------------------------------------------
+	 * Executor related functions.
+	 * ------------------------------------------------------------------------
+	 */
+
+	/*
+	 * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
+	 * of a bitmap table scan. `scan` was started via table_beginscan_bm().
+	 * Return false if there are no tuples to be found on the page, true
+	 * otherwise.
+	 *
+	 * This will typically read and pin the target block, and do the necessary
+	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
+	 * make sense to perform tuple visibility checks at this time). For some
+	 * AMs it will make more sense to do all the work referencing `tbmres`
+	 * contents here, for others it might be better to defer more work to
+	 * scan_bitmap_next_tuple.
+	 *
+	 * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
+	 * on the page have to be returned, otherwise the tuples at offsets in
+	 * `tbmres->offsets` need to be returned.
+	 *
+	 * XXX: Currently this may only be implemented if the AM uses md.c as its
+	 * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
+	 * blockids directly to the underlying storage. nodeBitmapHeapscan.c
+	 * performs prefetching directly using that interface.  This probably
+	 * needs to be rectified at a later point.
+	 *
+	 * XXX: Currently this may only be implemented if the AM uses the
+	 * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
+	 * perform prefetching.  This probably needs to be rectified at a later
+	 * point.
+	 *
+	 * Optional callback, but either both scan_bitmap_next_block and
+	 * scan_bitmap_next_tuple need to exist, or neither.
+	 */
+	bool		(*scan_bitmap_next_block) (TableScanDesc scan,
+										   struct TBMIterateResult *tbmres);
+
+	/*
+	 * Fetch the next tuple of a bitmap table scan into `slot` and return true
+	 * if a visible tuple was found, false otherwise.
+	 *
+	 * For some AMs it will make more sense to do all the work referencing
+	 * `tbmres` contents in scan_bitmap_next_block, for others it might be
+	 * better to defer more work to this callback.
+	 *
+	 * Optional callback, but either both scan_bitmap_next_block and
+	 * scan_bitmap_next_tuple need to exist, or neither.
+	 */
+	bool		(*scan_bitmap_next_tuple) (TableScanDesc scan,
+										   struct TBMIterateResult *tbmres,
+										   TupleTableSlot *slot);
+
+	/*
+	 * Prepare to fetch tuples from the next block in a sample scan. Return
+	 * false if the sample scan is finished, true otherwise. `scan` was
+	 * started via table_beginscan_sampling().
+	 *
+	 * Typically this will first determine the target block by calling the
+	 * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
+	 * perform a sequential scan over all blocks.  The determined block is
+	 * then typically read and pinned.
+	 *
+	 * As the TsmRoutine interface is block based, a block needs to be passed
+	 * to NextSampleBlock(). If that's not appropriate for an AM, it
+	 * internally needs to perform mapping between the internal and a block
+	 * based representation.
+	 *
+	 * Note that it's not acceptable to hold deadlock prone resources such as
+	 * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
+	 * block - the tuple is likely to be returned to an upper query node, and
+	 * the next call could be off a long while. Holding buffer pins and such
+	 * is obviously OK.
+	 *
+	 * Currently it is required to implement this interface, as there's no
+	 * alternative way (contrary e.g. to bitmap scans) to implement sample
+	 * scans. If infeasible to implement, the AM may raise an error.
+	 */
+	bool		(*scan_sample_next_block) (TableScanDesc scan,
+										   struct SampleScanState *scanstate);
+
+	/*
+	 * This callback, only called after scan_sample_next_block has returned
+	 * true, should determine the next tuple to be returned from the selected
+	 * block using the TsmRoutine's NextSampleTuple() callback.
+	 *
+	 * The callback needs to perform visibility checks, and only return
+	 * visible tuples. That obviously can mean calling NextSampleTuple()
+	 * multiple times.
+	 *
+	 * The TsmRoutine interface assumes that there's a maximum offset on a
+	 * given page, so if that doesn't apply to an AM, it needs to emulate that
+	 * assumption somehow.
+	 */
+	bool		(*scan_sample_next_tuple) (TableScanDesc scan,
+										   struct SampleScanState *scanstate,
+										   TupleTableSlot *slot);
+
+} TableAmRoutine;
+
+
+/* ----------------------------------------------------------------------------
+ * Slot functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Returns slot callbacks suitable for holding tuples of the appropriate type
+ * for the relation.  Works for tables, views, foreign tables and partitioned
+ * tables.
+ */
+extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
+
+/*
+ * Returns slot using the callbacks returned by table_slot_callbacks(), and
+ * registers it on *reglist.
+ */
+extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
+
+
+/* ----------------------------------------------------------------------------
+ * Table scan functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Start a scan of `rel`. Returned tuples pass a visibility test of
+ * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
+ */
+static inline TableScanDesc
+table_beginscan(Relation rel, Snapshot snapshot,
+				int nkeys, struct ScanKeyData *key)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+	SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
+/*
+ * Like table_beginscan(), but for scanning catalog. It'll automatically use a
+ * snapshot appropriate for scanning catalog relations.
+ */
+extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
+											 struct ScanKeyData *key);
+
+/*
+ * Like table_beginscan(), but table_beginscan_strat() offers an extended API
+ * that lets the caller control whether a nondefault buffer access strategy
+ * can be used, and whether syncscan can be chosen (possibly resulting in the
+ * scan not starting from block zero).  Both of these default to true with
+ * plain table_beginscan.
+ */
+static inline TableScanDesc
+table_beginscan_strat(Relation rel, Snapshot snapshot,
+					  int nkeys, struct ScanKeyData *key,
+					  bool allow_strat, bool allow_sync)
+{
+	uint32		flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE;
+
+	if (allow_strat)
+		flags |= SO_ALLOW_STRAT;
+	if (allow_sync)
+		flags |= SO_ALLOW_SYNC;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
+/*
+ * table_beginscan_bm is an alternative entry point for setting up a
+ * TableScanDesc for a bitmap heap scan.  Although that scan technology is
+ * really quite unlike a standard seqscan, there is just enough commonality to
+ * make it worth using the same data structure.
+ */
+static inline TableScanDesc
+table_beginscan_bm(Relation rel, Snapshot snapshot,
+				   int nkeys, struct ScanKeyData *key)
+{
+	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
+/*
+ * table_beginscan_sampling is an alternative entry point for setting up a
+ * TableScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by table_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
+ */
+static inline TableScanDesc
+table_beginscan_sampling(Relation rel, Snapshot snapshot,
+						 int nkeys, struct ScanKeyData *key,
+						 bool allow_strat, bool allow_sync,
+						 bool allow_pagemode)
+{
+	uint32		flags = SO_TYPE_SAMPLESCAN;
+
+	if (allow_strat)
+		flags |= SO_ALLOW_STRAT;
+	if (allow_sync)
+		flags |= SO_ALLOW_SYNC;
+	if (allow_pagemode)
+		flags |= SO_ALLOW_PAGEMODE;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
+/*
+ * table_beginscan_tid is an alternative entry point for setting up a
+ * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
+ * the same data structure although the behavior is rather different.
+ */
+static inline TableScanDesc
+table_beginscan_tid(Relation rel, Snapshot snapshot)
+{
+	uint32		flags = SO_TYPE_TIDSCAN;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+}
+
+/*
+ * table_beginscan_analyze is an alternative entry point for setting up a
+ * TableScanDesc for an ANALYZE scan.  As with bitmap scans, it's worth using
+ * the same data structure although the behavior is rather different.
+ */
+static inline TableScanDesc
+table_beginscan_analyze(Relation rel)
+{
+	uint32		flags = SO_TYPE_ANALYZE;
+
+	return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
+}
+
+/*
+ * End relation scan.
+ */
+static inline void
+table_endscan(TableScanDesc scan)
+{
+	scan->rs_rd->rd_tableam->scan_end(scan);
+}
+
+/*
+ * Restart a relation scan.
+ */
+static inline void
+table_rescan(TableScanDesc scan,
+			 struct ScanKeyData *key)
+{
+	scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
+}
+
+/*
+ * Restart a relation scan after changing params.
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use of
+ * syncscan might change (effectively, enabling or disabling reporting), the
+ * previously selected startblock will be kept.
+ */
+static inline void
+table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
+						bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+	scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
+										 allow_strat, allow_sync,
+										 allow_pagemode);
+}
+
+/*
+ * Update snapshot used by the scan.
+ */
+extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
+
+/*
+ * Return next tuple from `scan`, store in slot.
+ */
+static inline bool
+table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+	slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
+
+	/*
+	 * We don't expect direct calls to table_scan_getnextslot with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
+
+	return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
+}
+
+/* ----------------------------------------------------------------------------
+ * TID Range scanning related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
+ * for a TID range scan.
+ */
+static inline TableScanDesc
+table_beginscan_tidrange(Relation rel, Snapshot snapshot,
+						 ItemPointer mintid,
+						 ItemPointer maxtid)
+{
+	TableScanDesc sscan;
+	uint32		flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
+
+	sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+
+	/* Set the range of TIDs to scan */
+	sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+
+	return sscan;
+}
+
+/*
+ * table_rescan_tidrange resets the scan position and sets the minimum and
+ * maximum TID range to scan for a TableScanDesc created by
+ * table_beginscan_tidrange.
+ */
+static inline void
+table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
+					  ItemPointer maxtid)
+{
+	/* Ensure table_beginscan_tidrange() was used. */
+	Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+	sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
+	sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+}
+
+/*
+ * Fetch the next tuple from `sscan` for a TID range scan created by
+ * table_beginscan_tidrange().  Stores the tuple in `slot` and returns true,
+ * or returns false if no more tuples exist in the range.
+ */
+static inline bool
+table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+								TupleTableSlot *slot)
+{
+	/* Ensure table_beginscan_tidrange() was used. */
+	Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+	return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
+															   direction,
+															   slot);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Parallel table scan related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Estimate the size of shared memory needed for a parallel scan of this
+ * relation.
+ */
+extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
+
+/*
+ * Initialize ParallelTableScanDesc for a parallel scan of this
+ * relation. `pscan` needs to be sized according to parallelscan_estimate()
+ * for the same relation.  Call this just once in the leader process; then,
+ * individual workers attach via table_beginscan_parallel.
+ */
+extern void table_parallelscan_initialize(Relation rel,
+										  ParallelTableScanDesc pscan,
+										  Snapshot snapshot);
+
+/*
+ * Begin a parallel scan. `pscan` needs to have been initialized with
+ * table_parallelscan_initialize(), for the same relation. The initialization
+ * does not need to have happened in this backend.
+ *
+ * Caller must hold a suitable lock on the relation.
+ */
+extern TableScanDesc table_beginscan_parallel(Relation rel,
+											  ParallelTableScanDesc pscan);
+
+/*
+ * Restart a parallel scan.  Call this in the leader process.  Caller is
+ * responsible for making sure that all workers have finished the scan
+ * beforehand.
+ */
+static inline void
+table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
+{
+	rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Index scan related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Prepare to fetch tuples from the relation, as needed when fetching tuples
+ * for an index scan.
+ *
+ * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
+ */
+static inline IndexFetchTableData *
+table_index_fetch_begin(Relation rel)
+{
+	return rel->rd_tableam->index_fetch_begin(rel);
+}
+
+/*
+ * Reset index fetch. Typically this will release cross index fetch resources
+ * held in IndexFetchTableData.
+ */
+static inline void
+table_index_fetch_reset(struct IndexFetchTableData *scan)
+{
+	scan->rel->rd_tableam->index_fetch_reset(scan);
+}
+
+/*
+ * Release resources and deallocate index fetch.
+ */
+static inline void
+table_index_fetch_end(struct IndexFetchTableData *scan)
+{
+	scan->rel->rd_tableam->index_fetch_end(scan);
+}
+
+/*
+ * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
+ * a visibility test according to `snapshot`. If a tuple was found and passed
+ * the visibility test, returns true, false otherwise. Note that *tid may be
+ * modified when we return true (see later remarks on multiple row versions
+ * reachable via a single index entry).
+ *
+ * *call_again needs to be false on the first call to table_index_fetch_tuple() for
+ * a tid. If there potentially is another tuple matching the tid, *call_again
+ * will be set to true, signaling that table_index_fetch_tuple() should be called
+ * again for the same tid.
+ *
+ * *all_dead, if all_dead is not NULL, will be set to true by
+ * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
+ * that tuple. Index AMs can use that to avoid returning that tid in future
+ * searches.
+ *
+ * The difference between this function and table_tuple_fetch_row_version()
+ * is that this function returns the currently visible version of a row if
+ * the AM supports storing multiple row versions reachable via a single index
+ * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
+ * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
+ * lookups, table_tuple_fetch_row_version() is what's usually needed.
+ */
+static inline bool
+table_index_fetch_tuple(struct IndexFetchTableData *scan,
+						ItemPointer tid,
+						Snapshot snapshot,
+						TupleTableSlot *slot,
+						bool *call_again, bool *all_dead)
+{
+	/*
+	 * We don't expect direct calls to table_index_fetch_tuple with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
+
+	return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
+													slot, call_again,
+													all_dead);
+}
+
+/*
+ * This is a convenience wrapper around table_index_fetch_tuple() which
+ * returns whether there are table tuple items corresponding to an index
+ * entry.  This likely is only useful to verify if there's a conflict in a
+ * unique index.
+ */
+extern bool table_index_fetch_tuple_check(Relation rel,
+										  ItemPointer tid,
+										  Snapshot snapshot,
+										  bool *all_dead);
+
+
+/* ------------------------------------------------------------------------
+ * Functions for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
+
+
+/*
+ * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
+ * `snapshot`. If a tuple was found and passed the visibility test, returns
+ * true, false otherwise.
+ *
+ * See table_index_fetch_tuple's comment about what the difference between
+ * these functions is. It is correct to use this function outside of index
+ * entry->table tuple lookups.
+ */
+static inline bool
+table_tuple_fetch_row_version(Relation rel,
+							  ItemPointer tid,
+							  Snapshot snapshot,
+							  TupleTableSlot *slot)
+{
+	/*
+	 * We don't expect direct calls to table_tuple_fetch_row_version with
+	 * valid CheckXidAlive for catalog or regular tables.  See detailed
+	 * comments in xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
+
+	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+}
+
+/*
+ * Verify that `tid` is a potentially valid tuple identifier. That doesn't
+ * mean that the pointed to row needs to exist or be visible, but that
+ * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
+ * table_tuple_fetch_row_version()) should not error out if called with that
+ * tid.
+ *
+ * `scan` needs to have been started via table_beginscan().
+ */
+static inline bool
+table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
+{
+	return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
+}
+
+/*
+ * Return the latest version of the tuple at `tid`, by updating `tid` to
+ * point at the newest version.
+ */
+extern void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid);
+
+/*
+ * Return true iff tuple in slot satisfies the snapshot.
+ *
+ * This assumes the slot's tuple is valid, and of the appropriate type for the
+ * AM.
+ *
+ * Some AMs might modify the data underlying the tuple as a side-effect. If so
+ * they ought to mark the relevant buffer dirty.
+ */
+static inline bool
+table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
+							   Snapshot snapshot)
+{
+	return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
+}
+
+/*
+ * Determine which index tuples are safe to delete based on their table TID.
+ *
+ * Determines which entries from index AM caller's TM_IndexDeleteOp state
+ * point to vacuumable table tuples.  Entries that are found by tableam to be
+ * vacuumable are naturally safe for index AM to delete, and so get directly
+ * marked as deletable.  See comments above TM_IndexDelete and comments above
+ * TM_IndexDeleteOp for full details.
+ *
+ * Returns a latestRemovedXid transaction ID that caller generally places in
+ * its index deletion WAL record.  This might be used during subsequent REDO
+ * of the WAL record when in Hot Standby mode -- a recovery conflict for the
+ * index deletion operation might be required on the standby.
+ */
+static inline TransactionId
+table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
+{
+	return rel->rd_tableam->index_delete_tuples(rel, delstate);
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Functions for manipulations of physical tuples.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Insert a tuple from a slot into table AM routine.
+ *
+ * The options bitmask allows the caller to specify options that may change the
+ * behaviour of the AM. The AM will ignore options that it does not support.
+ *
+ * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
+ * free space in the relation. This can save some cycles when we know the
+ * relation is new and doesn't contain useful amounts of free space.
+ * TABLE_INSERT_SKIP_FSM is commonly passed directly to
+ * RelationGetBufferForTuple. See that method for more information.
+ *
+ * TABLE_INSERT_FROZEN should only be specified for inserts into
+ * relfilenodes created during the current subtransaction and when
+ * there are no prior snapshots or pre-existing portals open.
+ * This causes rows to be frozen, which is an MVCC violation and
+ * requires explicit options chosen by user.
+ *
+ * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
+ * information for the tuple. This should solely be used during table rewrites
+ * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
+ * relation.
+ *
+ * Note that most of these options will be applied when inserting into the
+ * heap's TOAST table, too, if the tuple requires any out-of-line data.
+ *
+ * The BulkInsertState object (if any; bistate can be NULL for default
+ * behavior) is also just passed through to RelationGetBufferForTuple. If
+ * `bistate` is provided, table_finish_bulk_insert() needs to be called.
+ *
+ * On return the slot's tts_tid and tts_tableOid are updated to reflect the
+ * insertion. But note that any toasting of fields within the slot is NOT
+ * reflected in the slots contents.
+ */
+static inline void
+table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
+				   int options, struct BulkInsertStateData *bistate)
+{
+	rel->rd_tableam->tuple_insert(rel, slot, cid, options,
+								  bistate);
+}
+
+/*
+ * Perform a "speculative insertion". These can be backed out afterwards
+ * without aborting the whole transaction.  Other sessions can wait for the
+ * speculative insertion to be confirmed, turning it into a regular tuple, or
+ * aborted, as if it never existed.  Speculatively inserted tuples behave as
+ * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
+ *
+ * A transaction having performed a speculative insertion has to either abort,
+ * or finish the speculative insertion with
+ * table_tuple_complete_speculative(succeeded = ...).
+ */
+static inline void
+table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
+							   CommandId cid, int options,
+							   struct BulkInsertStateData *bistate,
+							   uint32 specToken)
+{
+	rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
+											  bistate, specToken);
+}
+
+/*
+ * Complete "speculative insertion" started in the same transaction. If
+ * succeeded is true, the tuple is fully inserted, if false, it's removed.
+ */
+static inline void
+table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
+								 uint32 specToken, bool succeeded)
+{
+	rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
+												succeeded);
+}
+
+/*
+ * Insert multiple tuples into a table.
+ *
+ * This is like table_tuple_insert(), but inserts multiple tuples in one
+ * operation. That's often faster than calling table_tuple_insert() in a loop,
+ * because e.g. the AM can reduce WAL logging and page locking overhead.
+ *
+ * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
+ * in `slots`, the parameters for table_multi_insert() are the same as for
+ * table_tuple_insert().
+ *
+ * Note: this leaks memory into the current memory context. You can create a
+ * temporary context before calling this, if that's a problem.
+ */
+static inline void
+table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
+				   CommandId cid, int options, struct BulkInsertStateData *bistate)
+{
+	rel->rd_tableam->multi_insert(rel, slots, nslots,
+								  cid, options, bistate);
+}
+
+/*
+ * Delete a tuple.
+ *
+ * NB: do not call this directly unless prepared to deal with
+ * concurrent-update conditions.  Use simple_table_tuple_delete instead.
+ *
+ * Input parameters:
+ *	relation - table to be modified (caller must hold suitable lock)
+ *	tid - TID of tuple to be deleted
+ *	cid - delete command ID (used for visibility test, and stored into
+ *		cmax if successful)
+ *	crosscheck - if not InvalidSnapshot, also check tuple against this
+ *	wait - true if should wait for any conflicting update to commit/abort
+ * Output parameters:
+ *	tmfd - filled in failure cases (see below)
+ *	changingPart - true iff the tuple is being moved to another partition
+ *		table due to an update of the partition key. Otherwise, false.
+ *
+ * Normal, successful return value is TM_Ok, which means we did actually
+ * delete it.  Failure return codes are TM_SelfModified, TM_Updated, and
+ * TM_BeingModified (the last only possible if wait == false).
+ *
+ * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
+ * t_xmax, and, if possible, and, if possible, t_cmax.  See comments for
+ * struct TM_FailureData for additional info.
+ */
+static inline TM_Result
+table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
+				   Snapshot snapshot, Snapshot crosscheck, bool wait,
+				   TM_FailureData *tmfd, bool changingPart)
+{
+	return rel->rd_tableam->tuple_delete(rel, tid, cid,
+										 snapshot, crosscheck,
+										 wait, tmfd, changingPart);
+}
+
+/*
+ * Update a tuple.
+ *
+ * NB: do not call this directly unless you are prepared to deal with
+ * concurrent-update conditions.  Use simple_table_tuple_update instead.
+ *
+ * Input parameters:
+ *	relation - table to be modified (caller must hold suitable lock)
+ *	otid - TID of old tuple to be replaced
+ *	slot - newly constructed tuple data to store
+ *	cid - update command ID (used for visibility test, and stored into
+ *		cmax/cmin if successful)
+ *	crosscheck - if not InvalidSnapshot, also check old tuple against this
+ *	wait - true if should wait for any conflicting update to commit/abort
+ * Output parameters:
+ *	tmfd - filled in failure cases (see below)
+ *	lockmode - filled with lock mode acquired on tuple
+ *  update_indexes - in success cases this is set to true if new index entries
+ *		are required for this tuple
+ *
+ * Normal, successful return value is TM_Ok, which means we did actually
+ * update it.  Failure return codes are TM_SelfModified, TM_Updated, and
+ * TM_BeingModified (the last only possible if wait == false).
+ *
+ * On success, the slot's tts_tid and tts_tableOid are updated to match the new
+ * stored tuple; in particular, slot->tts_tid is set to the TID where the
+ * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
+ * update was done.  However, any TOAST changes in the new tuple's
+ * data are not reflected into *newtup.
+ *
+ * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
+ * t_xmax, and, if possible, t_cmax.  See comments for struct TM_FailureData
+ * for additional info.
+ */
+static inline TM_Result
+table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
+				   CommandId cid, Snapshot snapshot, Snapshot crosscheck,
+				   bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
+				   bool *update_indexes)
+{
+	return rel->rd_tableam->tuple_update(rel, otid, slot,
+										 cid, snapshot, crosscheck,
+										 wait, tmfd,
+										 lockmode, update_indexes);
+}
+
+/*
+ * Lock a tuple in the specified mode.
+ *
+ * Input parameters:
+ *	relation: relation containing tuple (caller must hold suitable lock)
+ *	tid: TID of tuple to lock
+ *	snapshot: snapshot to use for visibility determinations
+ *	cid: current command ID (used for visibility test, and stored into
+ *		tuple's cmax if lock is successful)
+ *	mode: lock mode desired
+ *	wait_policy: what to do if tuple lock is not available
+ *	flags:
+ *		If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
+ *		also lock descendant tuples if lock modes don't conflict.
+ *		If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
+ *		latest version.
+ *
+ * Output parameters:
+ *	*slot: contains the target tuple
+ *	*tmfd: filled in failure cases (see below)
+ *
+ * Function result may be:
+ *	TM_Ok: lock was successfully acquired
+ *	TM_Invisible: lock failed because tuple was never visible to us
+ *	TM_SelfModified: lock failed because tuple updated by self
+ *	TM_Updated: lock failed because tuple updated by other xact
+ *	TM_Deleted: lock failed because tuple deleted by other xact
+ *	TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
+ *
+ * In the failure cases other than TM_Invisible and TM_Deleted, the routine
+ * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.  See
+ * comments for struct TM_FailureData for additional info.
+ */
+static inline TM_Result
+table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
+				 TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
+				 LockWaitPolicy wait_policy, uint8 flags,
+				 TM_FailureData *tmfd)
+{
+	return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
+									   cid, mode, wait_policy,
+									   flags, tmfd);
+}
+
+/*
+ * Perform operations necessary to complete insertions made via
+ * tuple_insert and multi_insert with a BulkInsertState specified.
+ */
+static inline void
+table_finish_bulk_insert(Relation rel, int options)
+{
+	/* optional callback */
+	if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
+		rel->rd_tableam->finish_bulk_insert(rel, options);
+}
+
+
+/* ------------------------------------------------------------------------
+ * DDL related functionality.
+ * ------------------------------------------------------------------------
+ */
+
+/*
+ * Create storage for `rel` in `newrnode`, with persistence set to
+ * `persistence`.
+ *
+ * This is used both during relation creation and various DDL operations to
+ * create a new relfilenode that can be filled from scratch.  When creating
+ * new storage for an existing relfilenode, this should be called before the
+ * relcache entry has been updated.
+ *
+ * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
+ * that pg_class.{relfrozenxid, relminmxid} have to be set to.
+ */
+static inline void
+table_relation_set_new_filenode(Relation rel,
+								const RelFileNode *newrnode,
+								char persistence,
+								TransactionId *freezeXid,
+								MultiXactId *minmulti)
+{
+	rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence,
+											   freezeXid, minmulti);
+}
+
+/*
+ * Remove all table contents from `rel`, in a non-transactional manner.
+ * Non-transactional meaning that there's no need to support rollbacks. This
+ * commonly only is used to perform truncations for relfilenodes created in the
+ * current transaction.
+ */
+static inline void
+table_relation_nontransactional_truncate(Relation rel)
+{
+	rel->rd_tableam->relation_nontransactional_truncate(rel);
+}
+
+/*
+ * Copy data from `rel` into the new relfilenode `newrnode`. The new
+ * relfilenode may not have storage associated before this function is
+ * called. This is only supposed to be used for low level operations like
+ * changing a relation's tablespace.
+ */
+static inline void
+table_relation_copy_data(Relation rel, const RelFileNode *newrnode)
+{
+	rel->rd_tableam->relation_copy_data(rel, newrnode);
+}
+
+/*
+ * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
+ * FULL.
+ *
+ * Additional Input parameters:
+ * - use_sort - if true, the table contents are sorted appropriate for
+ *   `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
+ *   in that index's order; if false and OldIndex is InvalidOid, no sorting is
+ *   performed
+ * - OldIndex - see use_sort
+ * - OldestXmin - computed by vacuum_set_xid_limits(), even when
+ *   not needed for the relation's AM
+ * - *xid_cutoff - ditto
+ * - *multi_cutoff - ditto
+ *
+ * Output parameters:
+ * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
+ * - *multi_cutoff - rel's new relminmxid value, may be invalid
+ * - *tups_vacuumed - stats, for logging, if appropriate for AM
+ * - *tups_recently_dead - stats, for logging, if appropriate for AM
+ */
+static inline void
+table_relation_copy_for_cluster(Relation OldTable, Relation NewTable,
+								Relation OldIndex,
+								bool use_sort,
+								TransactionId OldestXmin,
+								TransactionId *xid_cutoff,
+								MultiXactId *multi_cutoff,
+								double *num_tuples,
+								double *tups_vacuumed,
+								double *tups_recently_dead)
+{
+	OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
+													use_sort, OldestXmin,
+													xid_cutoff, multi_cutoff,
+													num_tuples, tups_vacuumed,
+													tups_recently_dead);
+}
+
+/*
+ * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
+ * autovacuum. The specific actions performed by the AM will depend heavily on
+ * the individual AM.
+ *
+ * On entry a transaction needs to already been established, and the
+ * table is locked with a ShareUpdateExclusive lock.
+ *
+ * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
+ * routine, even if (for ANALYZE) it is part of the same VACUUM command.
+ */
+static inline void
+table_relation_vacuum(Relation rel, struct VacuumParams *params,
+					  BufferAccessStrategy bstrategy)
+{
+	rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
+}
+
+/*
+ * Prepare to analyze block `blockno` of `scan`. The scan needs to have been
+ * started with table_beginscan_analyze().  Note that this routine might
+ * acquire resources like locks that are held until
+ * table_scan_analyze_next_tuple() returns false.
+ *
+ * Returns false if block is unsuitable for sampling, true otherwise.
+ */
+static inline bool
+table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
+							  BufferAccessStrategy bstrategy)
+{
+	return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
+															bstrategy);
+}
+
+/*
+ * Iterate over tuples in the block selected with
+ * table_scan_analyze_next_block() (which needs to have returned true, and
+ * this routine may not have returned false for the same block before). If a
+ * tuple that's suitable for sampling is found, true is returned and a tuple
+ * is stored in `slot`.
+ *
+ * *liverows and *deadrows are incremented according to the encountered
+ * tuples.
+ */
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+							  double *liverows, double *deadrows,
+							  TupleTableSlot *slot)
+{
+	return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
+															liverows, deadrows,
+															slot);
+}
+
+/*
+ * table_index_build_scan - scan the table to find tuples to be indexed
+ *
+ * This is called back from an access-method-specific index build procedure
+ * after the AM has done whatever setup it needs.  The parent table relation
+ * is scanned to find tuples that should be entered into the index.  Each
+ * such tuple is passed to the AM's callback routine, which does the right
+ * things to add it to the new index.  After we return, the AM's index
+ * build procedure does whatever cleanup it needs.
+ *
+ * The total count of live tuples is returned.  This is for updating pg_class
+ * statistics.  (It's annoying not to be able to do that here, but we want to
+ * merge that update with others; see index_update_stats.)  Note that the
+ * index AM itself must keep track of the number of index tuples; we don't do
+ * so here because the AM might reject some of the tuples for its own reasons,
+ * such as being unable to store NULLs.
+ *
+ * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
+ * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
+ *
+ * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
+ * any potentially broken HOT chains.  Currently, we set this if there are any
+ * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
+ * very hard to detect whether they're really incompatible with the chain tip.
+ * This only really makes sense for heap AM, it might need to be generalized
+ * for other AMs later.
+ */
+static inline double
+table_index_build_scan(Relation table_rel,
+					   Relation index_rel,
+					   struct IndexInfo *index_info,
+					   bool allow_sync,
+					   bool progress,
+					   IndexBuildCallback callback,
+					   void *callback_state,
+					   TableScanDesc scan)
+{
+	return table_rel->rd_tableam->index_build_range_scan(table_rel,
+														 index_rel,
+														 index_info,
+														 allow_sync,
+														 false,
+														 progress,
+														 0,
+														 InvalidBlockNumber,
+														 callback,
+														 callback_state,
+														 scan);
+}
+
+/*
+ * As table_index_build_scan(), except that instead of scanning the complete
+ * table, only the given number of blocks are scanned.  Scan to end-of-rel can
+ * be signaled by passing InvalidBlockNumber as numblocks.  Note that
+ * restricting the range to scan cannot be done when requesting syncscan.
+ *
+ * When "anyvisible" mode is requested, all tuples visible to any transaction
+ * are indexed and counted as live, including those inserted or deleted by
+ * transactions that are still in progress.
+ */
+static inline double
+table_index_build_range_scan(Relation table_rel,
+							 Relation index_rel,
+							 struct IndexInfo *index_info,
+							 bool allow_sync,
+							 bool anyvisible,
+							 bool progress,
+							 BlockNumber start_blockno,
+							 BlockNumber numblocks,
+							 IndexBuildCallback callback,
+							 void *callback_state,
+							 TableScanDesc scan)
+{
+	return table_rel->rd_tableam->index_build_range_scan(table_rel,
+														 index_rel,
+														 index_info,
+														 allow_sync,
+														 anyvisible,
+														 progress,
+														 start_blockno,
+														 numblocks,
+														 callback,
+														 callback_state,
+														 scan);
+}
+
+/*
+ * table_index_validate_scan - second table scan for concurrent index build
+ *
+ * See validate_index() for an explanation.
+ */
+static inline void
+table_index_validate_scan(Relation table_rel,
+						  Relation index_rel,
+						  struct IndexInfo *index_info,
+						  Snapshot snapshot,
+						  struct ValidateIndexState *state)
+{
+	table_rel->rd_tableam->index_validate_scan(table_rel,
+											   index_rel,
+											   index_info,
+											   snapshot,
+											   state);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Miscellaneous functionality
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Return the current size of `rel` in bytes. If `forkNumber` is
+ * InvalidForkNumber, return the relation's overall size, otherwise the size
+ * for the indicated fork.
+ *
+ * Note that the overall size might not be the equivalent of the sum of sizes
+ * for the individual forks for some AMs, e.g. because the AMs storage does
+ * not neatly map onto the builtin types of forks.
+ */
+static inline uint64
+table_relation_size(Relation rel, ForkNumber forkNumber)
+{
+	return rel->rd_tableam->relation_size(rel, forkNumber);
+}
+
+/*
+ * table_relation_needs_toast_table - does this relation need a toast table?
+ */
+static inline bool
+table_relation_needs_toast_table(Relation rel)
+{
+	return rel->rd_tableam->relation_needs_toast_table(rel);
+}
+
+/*
+ * Return the OID of the AM that should be used to implement the TOAST table
+ * for this relation.
+ */
+static inline Oid
+table_relation_toast_am(Relation rel)
+{
+	return rel->rd_tableam->relation_toast_am(rel);
+}
+
+/*
+ * Fetch all or part of a TOAST value from a TOAST table.
+ *
+ * If this AM is never used to implement a TOAST table, then this callback
+ * is not needed. But, if toasted values are ever stored in a table of this
+ * type, then you will need this callback.
+ *
+ * toastrel is the relation in which the toasted value is stored.
+ *
+ * valueid identifes which toast value is to be fetched. For the heap,
+ * this corresponds to the values stored in the chunk_id column.
+ *
+ * attrsize is the total size of the toast value to be fetched.
+ *
+ * sliceoffset is the offset within the toast value of the first byte that
+ * should be fetched.
+ *
+ * slicelength is the number of bytes from the toast value that should be
+ * fetched.
+ *
+ * result is caller-allocated space into which the fetched bytes should be
+ * stored.
+ */
+static inline void
+table_relation_fetch_toast_slice(Relation toastrel, Oid valueid,
+								 int32 attrsize, int32 sliceoffset,
+								 int32 slicelength, struct varlena *result)
+{
+	toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
+													 attrsize,
+													 sliceoffset, slicelength,
+													 result);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Planner related functionality
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Estimate the current size of the relation, as an AM specific workhorse for
+ * estimate_rel_size(). Look there for an explanation of the parameters.
+ */
+static inline void
+table_relation_estimate_size(Relation rel, int32 *attr_widths,
+							 BlockNumber *pages, double *tuples,
+							 double *allvisfrac)
+{
+	rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
+											allvisfrac);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Executor related functionality
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
+ * a bitmap table scan. `scan` needs to have been started via
+ * table_beginscan_bm(). Returns false if there are no tuples to be found on
+ * the page, true otherwise.
+ *
+ * Note, this is an optionally implemented function, therefore should only be
+ * used after verifying the presence (at plan time or such).
+ */
+static inline bool
+table_scan_bitmap_next_block(TableScanDesc scan,
+							 struct TBMIterateResult *tbmres)
+{
+	/*
+	 * We don't expect direct calls to table_scan_bitmap_next_block with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
+
+	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
+														   tbmres);
+}
+
+/*
+ * Fetch the next tuple of a bitmap table scan into `slot` and return true if
+ * a visible tuple was found, false otherwise.
+ * table_scan_bitmap_next_block() needs to previously have selected a
+ * block (i.e. returned true), and no previous
+ * table_scan_bitmap_next_tuple() for the same block may have
+ * returned false.
+ */
+static inline bool
+table_scan_bitmap_next_tuple(TableScanDesc scan,
+							 struct TBMIterateResult *tbmres,
+							 TupleTableSlot *slot)
+{
+	/*
+	 * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
+
+	return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
+														   tbmres,
+														   slot);
+}
+
+/*
+ * Prepare to fetch tuples from the next block in a sample scan. Returns false
+ * if the sample scan is finished, true otherwise. `scan` needs to have been
+ * started via table_beginscan_sampling().
+ *
+ * This will call the TsmRoutine's NextSampleBlock() callback if necessary
+ * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
+ * underlying relation.
+ */
+static inline bool
+table_scan_sample_next_block(TableScanDesc scan,
+							 struct SampleScanState *scanstate)
+{
+	/*
+	 * We don't expect direct calls to table_scan_sample_next_block with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
+	return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
+}
+
+/*
+ * Fetch the next sample tuple into `slot` and return true if a visible tuple
+ * was found, false otherwise. table_scan_sample_next_block() needs to
+ * previously have selected a block (i.e. returned true), and no previous
+ * table_scan_sample_next_tuple() for the same block may have returned false.
+ *
+ * This will call the TsmRoutine's NextSampleTuple() callback.
+ */
+static inline bool
+table_scan_sample_next_tuple(TableScanDesc scan,
+							 struct SampleScanState *scanstate,
+							 TupleTableSlot *slot)
+{
+	/*
+	 * We don't expect direct calls to table_scan_sample_next_tuple with valid
+	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
+	 * xact.c where these variables are declared.
+	 */
+	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+		elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
+	return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
+														   slot);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Functions to make modifications a bit simpler.
+ * ----------------------------------------------------------------------------
+ */
+
+extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
+extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
+									  Snapshot snapshot);
+extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
+									  TupleTableSlot *slot, Snapshot snapshot,
+									  bool *update_indexes);
+
+
+/* ----------------------------------------------------------------------------
+ * Helper functions to implement parallel scans for block oriented AMs.
+ * ----------------------------------------------------------------------------
+ */
+
+extern Size table_block_parallelscan_estimate(Relation rel);
+extern Size table_block_parallelscan_initialize(Relation rel,
+												ParallelTableScanDesc pscan);
+extern void table_block_parallelscan_reinitialize(Relation rel,
+												  ParallelTableScanDesc pscan);
+extern BlockNumber table_block_parallelscan_nextpage(Relation rel,
+													 ParallelBlockTableScanWorker pbscanwork,
+													 ParallelBlockTableScanDesc pbscan);
+extern void table_block_parallelscan_startblock_init(Relation rel,
+													 ParallelBlockTableScanWorker pbscanwork,
+													 ParallelBlockTableScanDesc pbscan);
+
+
+/* ----------------------------------------------------------------------------
+ * Helper functions to implement relation sizing for block oriented AMs.
+ * ----------------------------------------------------------------------------
+ */
+
+extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
+extern void table_block_relation_estimate_size(Relation rel,
+											   int32 *attr_widths,
+											   BlockNumber *pages,
+											   double *tuples,
+											   double *allvisfrac,
+											   Size overhead_bytes_per_tuple,
+											   Size usable_bytes_per_page);
+
+/* ----------------------------------------------------------------------------
+ * Functions in tableamapi.c
+ * ----------------------------------------------------------------------------
+ */
+
+extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
+extern bool check_default_table_access_method(char **newval, void **extra,
+											  GucSource source);
+
+#endif							/* TABLEAM_H */
diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h
new file mode 100644
index 0000000..ce3586c
--- /dev/null
+++ b/src/include/access/timeline.h
@@ -0,0 +1,44 @@
+/*
+ * timeline.h
+ *
+ * Functions for reading and writing timeline history files.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/timeline.h
+ */
+#ifndef TIMELINE_H
+#define TIMELINE_H
+
+#include "access/xlogdefs.h"
+#include "nodes/pg_list.h"
+
+/*
+ * A list of these structs describes the timeline history of the server. Each
+ * TimeLineHistoryEntry represents a piece of WAL belonging to the history,
+ * from newest to oldest. All WAL locations between 'begin' and 'end' belong to
+ * the timeline represented by the entry. Together the 'begin' and 'end'
+ * pointers of all the entries form a contiguous line from beginning of time
+ * to infinity.
+ */
+typedef struct
+{
+	TimeLineID	tli;
+	XLogRecPtr	begin;			/* inclusive */
+	XLogRecPtr	end;			/* exclusive, InvalidXLogRecPtr means infinity */
+} TimeLineHistoryEntry;
+
+extern List *readTimeLineHistory(TimeLineID targetTLI);
+extern bool existsTimeLineHistory(TimeLineID probeTLI);
+extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
+extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+								 XLogRecPtr switchpoint, char *reason);
+extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size);
+extern void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end);
+extern bool tliInHistory(TimeLineID tli, List *expectedTLEs);
+extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history);
+extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history,
+								 TimeLineID *nextTLI);
+
+#endif							/* TIMELINE_H */
diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h
new file mode 100644
index 0000000..c992ece
--- /dev/null
+++ b/src/include/access/toast_compression.h
@@ -0,0 +1,73 @@
+/*-------------------------------------------------------------------------
+ *
+ * toast_compression.h
+ *	  Functions for toast compression.
+ *
+ * Copyright (c) 2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/toast_compression.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef TOAST_COMPRESSION_H
+#define TOAST_COMPRESSION_H
+
+/*
+ * GUC support.
+ *
+ * default_toast_compression is an integer for purposes of the GUC machinery,
+ * but the value is one of the char values defined below, as they appear in
+ * pg_attribute.attcompression, e.g. TOAST_PGLZ_COMPRESSION.
+ */
+extern int	default_toast_compression;
+
+/*
+ * Built-in compression method ID.  The toast compression header will store
+ * this in the first 2 bits of the raw length.  These built-in compression
+ * method IDs are directly mapped to the built-in compression methods.
+ *
+ * Don't use these values for anything other than understanding the meaning
+ * of the raw bits from a varlena; in particular, if the goal is to identify
+ * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc.
+ * below. We might someday support more than 4 compression methods, but
+ * we can never have more than 4 values in this enum, because there are
+ * only 2 bits available in the places where this is stored.
+ */
+typedef enum ToastCompressionId
+{
+	TOAST_PGLZ_COMPRESSION_ID = 0,
+	TOAST_LZ4_COMPRESSION_ID = 1,
+	TOAST_INVALID_COMPRESSION_ID = 2
+} ToastCompressionId;
+
+/*
+ * Built-in compression methods.  pg_attribute will store these in the
+ * attcompression column.  In attcompression, InvalidCompressionMethod
+ * denotes the default behavior.
+ */
+#define TOAST_PGLZ_COMPRESSION			'p'
+#define TOAST_LZ4_COMPRESSION			'l'
+#define InvalidCompressionMethod		'\0'
+
+#define CompressionMethodIsValid(cm)  ((cm) != InvalidCompressionMethod)
+
+
+/* pglz compression/decompression routines */
+extern struct varlena *pglz_compress_datum(const struct varlena *value);
+extern struct varlena *pglz_decompress_datum(const struct varlena *value);
+extern struct varlena *pglz_decompress_datum_slice(const struct varlena *value,
+												   int32 slicelength);
+
+/* lz4 compression/decompression routines */
+extern struct varlena *lz4_compress_datum(const struct varlena *value);
+extern struct varlena *lz4_decompress_datum(const struct varlena *value);
+extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
+												  int32 slicelength);
+
+/* other stuff */
+extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
+extern char CompressionNameToMethod(const char *compression);
+extern const char *GetCompressionMethodName(char method);
+
+#endif							/* TOAST_COMPRESSION_H */
diff --git a/src/include/access/toast_helper.h b/src/include/access/toast_helper.h
new file mode 100644
index 0000000..05104ce
--- /dev/null
+++ b/src/include/access/toast_helper.h
@@ -0,0 +1,116 @@
+/*-------------------------------------------------------------------------
+ *
+ * toast_helper.h
+ *	  Helper functions for table AMs implementing compressed or
+ *    out-of-line storage of varlena attributes.
+ *
+ * Copyright (c) 2000-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/toast_helper.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef TOAST_HELPER_H
+#define TOAST_HELPER_H
+
+#include "utils/rel.h"
+
+/*
+ * Information about one column of a tuple being toasted.
+ *
+ * NOTE: toast_action[i] can have these values:
+ *		' '						default handling
+ *		TYPSTORAGE_PLAIN		already processed --- don't touch it
+ *		TYPSTORAGE_EXTENDED		incompressible, but OK to move off
+ *
+ * NOTE: toast_attr[i].tai_size is only made valid for varlena attributes with
+ * toast_action[i] different from TYPSTORAGE_PLAIN.
+ */
+typedef struct
+{
+	struct varlena *tai_oldexternal;
+	int32		tai_size;
+	uint8		tai_colflags;
+	char		tai_compression;
+} ToastAttrInfo;
+
+/*
+ * Information about one tuple being toasted.
+ */
+typedef struct
+{
+	/*
+	 * Before calling toast_tuple_init, the caller must initialize the
+	 * following fields.  Each array must have a length equal to
+	 * ttc_rel->rd_att->natts.  The tts_oldvalues and tts_oldisnull fields
+	 * should be NULL in the case of an insert.
+	 */
+	Relation	ttc_rel;		/* the relation that contains the tuple */
+	Datum	   *ttc_values;		/* values from the tuple columns */
+	bool	   *ttc_isnull;		/* null flags for the tuple columns */
+	Datum	   *ttc_oldvalues;	/* values from previous tuple */
+	bool	   *ttc_oldisnull;	/* null flags from previous tuple */
+
+	/*
+	 * Before calling toast_tuple_init, the caller should set tts_attr to
+	 * point to an array of ToastAttrInfo structures of a length equal to
+	 * tts_rel->rd_att->natts.  The contents of the array need not be
+	 * initialized.  ttc_flags also does not need to be initialized.
+	 */
+	uint8		ttc_flags;
+	ToastAttrInfo *ttc_attr;
+} ToastTupleContext;
+
+/*
+ * Flags indicating the overall state of a TOAST operation.
+ *
+ * TOAST_NEEDS_DELETE_OLD indicates that one or more old TOAST datums need
+ * to be deleted.
+ *
+ * TOAST_NEEDS_FREE indicates that one or more TOAST values need to be freed.
+ *
+ * TOAST_HAS_NULLS indicates that nulls were found in the tuple being toasted.
+ *
+ * TOAST_NEEDS_CHANGE indicates that a new tuple needs to built; in other
+ * words, the toaster did something.
+ */
+#define TOAST_NEEDS_DELETE_OLD				0x0001
+#define TOAST_NEEDS_FREE					0x0002
+#define TOAST_HAS_NULLS						0x0004
+#define TOAST_NEEDS_CHANGE					0x0008
+
+/*
+ * Flags indicating the status of a TOAST operation with respect to a
+ * particular column.
+ *
+ * TOASTCOL_NEEDS_DELETE_OLD indicates that the old TOAST datums for this
+ * column need to be deleted.
+ *
+ * TOASTCOL_NEEDS_FREE indicates that the value for this column needs to
+ * be freed.
+ *
+ * TOASTCOL_IGNORE indicates that the toaster should not further process
+ * this column.
+ *
+ * TOASTCOL_INCOMPRESSIBLE indicates that this column has been found to
+ * be incompressible, but could be moved out-of-line.
+ */
+#define TOASTCOL_NEEDS_DELETE_OLD			TOAST_NEEDS_DELETE_OLD
+#define TOASTCOL_NEEDS_FREE					TOAST_NEEDS_FREE
+#define TOASTCOL_IGNORE						0x0010
+#define TOASTCOL_INCOMPRESSIBLE				0x0020
+
+extern void toast_tuple_init(ToastTupleContext *ttc);
+extern int	toast_tuple_find_biggest_attribute(ToastTupleContext *ttc,
+											   bool for_compression,
+											   bool check_main);
+extern void toast_tuple_try_compression(ToastTupleContext *ttc, int attribute);
+extern void toast_tuple_externalize(ToastTupleContext *ttc, int attribute,
+									int options);
+extern void toast_tuple_cleanup(ToastTupleContext *ttc);
+
+extern void toast_delete_external(Relation rel, Datum *values, bool *isnull,
+								  bool is_speculative);
+
+#endif
diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h
new file mode 100644
index 0000000..1c28b07
--- /dev/null
+++ b/src/include/access/toast_internals.h
@@ -0,0 +1,63 @@
+/*-------------------------------------------------------------------------
+ *
+ * toast_internals.h
+ *	  Internal definitions for the TOAST system.
+ *
+ * Copyright (c) 2000-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/toast_internals.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TOAST_INTERNALS_H
+#define TOAST_INTERNALS_H
+
+#include "access/toast_compression.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+#include "utils/snapshot.h"
+
+/*
+ *	The information at the start of the compressed toast data.
+ */
+typedef struct toast_compress_header
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	uint32		tcinfo;			/* 2 bits for compression method and 30 bits
+								 * external size; see va_extinfo */
+} toast_compress_header;
+
+/*
+ * Utilities for manipulation of header information for compressed
+ * toast entries.
+ */
+#define TOAST_COMPRESS_EXTSIZE(ptr) \
+	(((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK)
+#define TOAST_COMPRESS_METHOD(ptr) \
+	(((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS)
+
+#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \
+	do { \
+		Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
+		Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
+			   (cm_method) == TOAST_LZ4_COMPRESSION_ID); \
+		((toast_compress_header *) (ptr))->tcinfo = \
+			(len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \
+	} while (0)
+
+extern Datum toast_compress_datum(Datum value, char cmethod);
+extern Oid	toast_get_valid_index(Oid toastoid, LOCKMODE lock);
+
+extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
+extern Datum toast_save_datum(Relation rel, Datum value,
+							  struct varlena *oldexternal, int options);
+
+extern int	toast_open_indexes(Relation toastrel,
+							   LOCKMODE lock,
+							   Relation **toastidxs,
+							   int *num_indexes);
+extern void toast_close_indexes(Relation *toastidxs, int num_indexes,
+								LOCKMODE lock);
+extern void init_toast_snapshot(Snapshot toast_snapshot);
+
+#endif							/* TOAST_INTERNALS_H */
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
new file mode 100644
index 0000000..2fe8a59
--- /dev/null
+++ b/src/include/access/transam.h
@@ -0,0 +1,370 @@
+/*-------------------------------------------------------------------------
+ *
+ * transam.h
+ *	  postgres transaction access method support code
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/transam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TRANSAM_H
+#define TRANSAM_H
+
+#include "access/xlogdefs.h"
+
+
+/* ----------------
+ *		Special transaction ID values
+ *
+ * BootstrapTransactionId is the XID for "bootstrap" operations, and
+ * FrozenTransactionId is used for very old tuples.  Both should
+ * always be considered valid.
+ *
+ * FirstNormalTransactionId is the first "normal" transaction id.
+ * Note: if you need to change it, you must change pg_class.h as well.
+ * ----------------
+ */
+#define InvalidTransactionId		((TransactionId) 0)
+#define BootstrapTransactionId		((TransactionId) 1)
+#define FrozenTransactionId			((TransactionId) 2)
+#define FirstNormalTransactionId	((TransactionId) 3)
+#define MaxTransactionId			((TransactionId) 0xFFFFFFFF)
+
+/* ----------------
+ *		transaction ID manipulation macros
+ * ----------------
+ */
+#define TransactionIdIsValid(xid)		((xid) != InvalidTransactionId)
+#define TransactionIdIsNormal(xid)		((xid) >= FirstNormalTransactionId)
+#define TransactionIdEquals(id1, id2)	((id1) == (id2))
+#define TransactionIdStore(xid, dest)	(*(dest) = (xid))
+#define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId)
+
+#define EpochFromFullTransactionId(x)	((uint32) ((x).value >> 32))
+#define XidFromFullTransactionId(x)		((uint32) (x).value)
+#define U64FromFullTransactionId(x)		((x).value)
+#define FullTransactionIdEquals(a, b)	((a).value == (b).value)
+#define FullTransactionIdPrecedes(a, b)	((a).value < (b).value)
+#define FullTransactionIdPrecedesOrEquals(a, b) ((a).value <= (b).value)
+#define FullTransactionIdFollows(a, b) ((a).value > (b).value)
+#define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value)
+#define FullTransactionIdIsValid(x)		TransactionIdIsValid(XidFromFullTransactionId(x))
+#define InvalidFullTransactionId		FullTransactionIdFromEpochAndXid(0, InvalidTransactionId)
+#define FirstNormalFullTransactionId	FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId)
+#define FullTransactionIdIsNormal(x)	FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId)
+
+/*
+ * A 64 bit value that contains an epoch and a TransactionId.  This is
+ * wrapped in a struct to prevent implicit conversion to/from TransactionId.
+ * Not all values represent valid normal XIDs.
+ */
+typedef struct FullTransactionId
+{
+	uint64		value;
+} FullTransactionId;
+
+static inline FullTransactionId
+FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
+{
+	FullTransactionId result;
+
+	result.value = ((uint64) epoch) << 32 | xid;
+
+	return result;
+}
+
+static inline FullTransactionId
+FullTransactionIdFromU64(uint64 value)
+{
+	FullTransactionId result;
+
+	result.value = value;
+
+	return result;
+}
+
+/* advance a transaction ID variable, handling wraparound correctly */
+#define TransactionIdAdvance(dest)	\
+	do { \
+		(dest)++; \
+		if ((dest) < FirstNormalTransactionId) \
+			(dest) = FirstNormalTransactionId; \
+	} while(0)
+
+/*
+ * Retreat a FullTransactionId variable, stepping over xids that would appear
+ * to be special only when viewed as 32bit XIDs.
+ */
+static inline void
+FullTransactionIdRetreat(FullTransactionId *dest)
+{
+	dest->value--;
+
+	/*
+	 * In contrast to 32bit XIDs don't step over the "actual" special xids.
+	 * For 64bit xids these can't be reached as part of a wraparound as they
+	 * can in the 32bit case.
+	 */
+	if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
+		return;
+
+	/*
+	 * But we do need to step over XIDs that'd appear special only for 32bit
+	 * XIDs.
+	 */
+	while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
+		dest->value--;
+}
+
+/*
+ * Advance a FullTransactionId variable, stepping over xids that would appear
+ * to be special only when viewed as 32bit XIDs.
+ */
+static inline void
+FullTransactionIdAdvance(FullTransactionId *dest)
+{
+	dest->value++;
+
+	/* see FullTransactionIdAdvance() */
+	if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
+		return;
+
+	while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
+		dest->value++;
+}
+
+/* back up a transaction ID variable, handling wraparound correctly */
+#define TransactionIdRetreat(dest)	\
+	do { \
+		(dest)--; \
+	} while ((dest) < FirstNormalTransactionId)
+
+/* compare two XIDs already known to be normal; this is a macro for speed */
+#define NormalTransactionIdPrecedes(id1, id2) \
+	(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
+	(int32) ((id1) - (id2)) < 0)
+
+/* compare two XIDs already known to be normal; this is a macro for speed */
+#define NormalTransactionIdFollows(id1, id2) \
+	(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
+	(int32) ((id1) - (id2)) > 0)
+
+/* ----------
+ *		Object ID (OID) zero is InvalidOid.
+ *
+ *		OIDs 1-9999 are reserved for manual assignment (see .dat files in
+ *		src/include/catalog/).  Of these, 8000-9999 are reserved for
+ *		development purposes (such as in-progress patches and forks);
+ *		they should not appear in released versions.
+ *
+ *		OIDs 10000-11999 are reserved for assignment by genbki.pl, for use
+ *		when the .dat files in src/include/catalog/ do not specify an OID
+ *		for a catalog entry that requires one.  Note that genbki.pl assigns
+ *		these OIDs independently in each catalog, so they're not guaranteed
+ *		to be globally unique.
+ *
+ *		OIDS 12000-16383 are reserved for assignment during initdb
+ *		using the OID generator.  (We start the generator at 12000.)
+ *
+ *		OIDs beginning at 16384 are assigned from the OID generator
+ *		during normal multiuser operation.  (We force the generator up to
+ *		16384 as soon as we are in normal operation.)
+ *
+ * The choices of 8000, 10000 and 12000 are completely arbitrary, and can be
+ * moved if we run low on OIDs in any category.  Changing the macros below,
+ * and updating relevant documentation (see bki.sgml and RELEASE_CHANGES),
+ * should be sufficient to do this.  Moving the 16384 boundary between
+ * initdb-assigned OIDs and user-defined objects would be substantially
+ * more painful, however, since some user-defined OIDs will appear in
+ * on-disk data; such a change would probably break pg_upgrade.
+ *
+ * NOTE: if the OID generator wraps around, we skip over OIDs 0-16383
+ * and resume with 16384.  This minimizes the odds of OID conflict, by not
+ * reassigning OIDs that might have been assigned during initdb.
+ * ----------
+ */
+#define FirstGenbkiObjectId		10000
+#define FirstBootstrapObjectId	12000
+#define FirstNormalObjectId		16384
+
+/*
+ * VariableCache is a data structure in shared memory that is used to track
+ * OID and XID assignment state.  For largely historical reasons, there is
+ * just one struct with different fields that are protected by different
+ * LWLocks.
+ *
+ * Note: xidWrapLimit and oldestXidDB are not "active" values, but are
+ * used just to generate useful messages when xidWarnLimit or xidStopLimit
+ * are exceeded.
+ */
+typedef struct VariableCacheData
+{
+	/*
+	 * These fields are protected by OidGenLock.
+	 */
+	Oid			nextOid;		/* next OID to assign */
+	uint32		oidCount;		/* OIDs available before must do XLOG work */
+
+	/*
+	 * These fields are protected by XidGenLock.
+	 */
+	FullTransactionId nextXid;	/* next XID to assign */
+
+	TransactionId oldestXid;	/* cluster-wide minimum datfrozenxid */
+	TransactionId xidVacLimit;	/* start forcing autovacuums here */
+	TransactionId xidWarnLimit; /* start complaining here */
+	TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
+	TransactionId xidWrapLimit; /* where the world ends */
+	Oid			oldestXidDB;	/* database with minimum datfrozenxid */
+
+	/*
+	 * These fields are protected by CommitTsLock
+	 */
+	TransactionId oldestCommitTsXid;
+	TransactionId newestCommitTsXid;
+
+	/*
+	 * These fields are protected by ProcArrayLock.
+	 */
+	FullTransactionId latestCompletedXid;	/* newest full XID that has
+											 * committed or aborted */
+
+	/*
+	 * Number of top-level transactions with xids (i.e. which may have
+	 * modified the database) that completed in some form since the start of
+	 * the server. This currently is solely used to check whether
+	 * GetSnapshotData() needs to recompute the contents of the snapshot, or
+	 * not. There are likely other users of this.  Always above 1.
+	 */
+	uint64		xactCompletionCount;
+
+	/*
+	 * These fields are protected by XactTruncationLock
+	 */
+	TransactionId oldestClogXid;	/* oldest it's safe to look up in clog */
+
+} VariableCacheData;
+
+typedef VariableCacheData *VariableCache;
+
+
+/* ----------------
+ *		extern declarations
+ * ----------------
+ */
+
+/* in transam/xact.c */
+extern bool TransactionStartedDuringRecovery(void);
+
+/* in transam/varsup.c */
+extern PGDLLIMPORT VariableCache ShmemVariableCache;
+
+/*
+ * prototypes for functions in transam/transam.c
+ */
+extern bool TransactionIdDidCommit(TransactionId transactionId);
+extern bool TransactionIdDidAbort(TransactionId transactionId);
+extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
+extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
+extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
+extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
+extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
+extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
+extern TransactionId TransactionIdLatest(TransactionId mainxid,
+										 int nxids, const TransactionId *xids);
+extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid);
+
+/* in transam/varsup.c */
+extern FullTransactionId GetNewTransactionId(bool isSubXact);
+extern void AdvanceNextFullTransactionIdPastXid(TransactionId xid);
+extern FullTransactionId ReadNextFullTransactionId(void);
+extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
+								  Oid oldest_datoid);
+extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
+extern bool ForceTransactionIdLimitUpdate(void);
+extern Oid	GetNewObjectId(void);
+
+#ifdef USE_ASSERT_CHECKING
+extern void AssertTransactionIdInAllowableRange(TransactionId xid);
+#else
+#define AssertTransactionIdInAllowableRange(xid) ((void)true)
+#endif
+
+/*
+ * Some frontend programs include this header.  For compilers that emit static
+ * inline functions even when they're unused, that leads to unsatisfied
+ * external references; hence hide them with #ifndef FRONTEND.
+ */
+#ifndef FRONTEND
+
+/*
+ * For callers that just need the XID part of the next transaction ID.
+ */
+static inline TransactionId
+ReadNextTransactionId(void)
+{
+	return XidFromFullTransactionId(ReadNextFullTransactionId());
+}
+
+/* return transaction ID backed up by amount, handling wraparound correctly */
+static inline TransactionId
+TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
+{
+	xid -= amount;
+
+	while (xid < FirstNormalTransactionId)
+		xid--;
+
+	return xid;
+}
+
+/* return the older of the two IDs */
+static inline TransactionId
+TransactionIdOlder(TransactionId a, TransactionId b)
+{
+	if (!TransactionIdIsValid(a))
+		return b;
+
+	if (!TransactionIdIsValid(b))
+		return a;
+
+	if (TransactionIdPrecedes(a, b))
+		return a;
+	return b;
+}
+
+/* return the older of the two IDs, assuming they're both normal */
+static inline TransactionId
+NormalTransactionIdOlder(TransactionId a, TransactionId b)
+{
+	Assert(TransactionIdIsNormal(a));
+	Assert(TransactionIdIsNormal(b));
+	if (NormalTransactionIdPrecedes(a, b))
+		return a;
+	return b;
+}
+
+/* return the newer of the two IDs */
+static inline FullTransactionId
+FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
+{
+	if (!FullTransactionIdIsValid(a))
+		return b;
+
+	if (!FullTransactionIdIsValid(b))
+		return a;
+
+	if (FullTransactionIdFollows(a, b))
+		return a;
+	return b;
+}
+
+#endif							/* FRONTEND */
+
+#endif							/* TRANSAM_H */
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h
new file mode 100644
index 0000000..2dc848c
--- /dev/null
+++ b/src/include/access/tsmapi.h
@@ -0,0 +1,82 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsmapi.h
+ *	  API for tablesample methods
+ *
+ * Copyright (c) 2015-2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+
+#include "nodes/execnodes.h"
+#include "nodes/pathnodes.h"
+
+
+/*
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+												  RelOptInfo *baserel,
+												  List *paramexprs,
+												  BlockNumber *pages,
+												  double *tuples);
+
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+										 int eflags);
+
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+										  Datum *params,
+										  int nparams,
+										  uint32 seed);
+
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node,
+												 BlockNumber nblocks);
+
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+												  BlockNumber blockno,
+												  OffsetNumber maxoffset);
+
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+
+/*
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function.  It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL.  This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+{
+	NodeTag		type;
+
+	/* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+	List	   *parameterTypes;
+
+	/* Can method produce repeatable samples across, or even within, queries? */
+	bool		repeatable_across_queries;
+	bool		repeatable_across_scans;
+
+	/* Functions for planning a SampleScan on a physical table */
+	SampleScanGetSampleSize_function SampleScanGetSampleSize;
+
+	/* Functions for executing a SampleScan on a physical table */
+	InitSampleScan_function InitSampleScan; /* can be NULL */
+	BeginSampleScan_function BeginSampleScan;
+	NextSampleBlock_function NextSampleBlock;	/* can be NULL */
+	NextSampleTuple_function NextSampleTuple;
+	EndSampleScan_function EndSampleScan;	/* can be NULL */
+} TsmRoutine;
+
+
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+
+#endif							/* TSMAPI_H */
diff --git a/src/include/access/tupconvert.h b/src/include/access/tupconvert.h
new file mode 100644
index 0000000..a2cc4b3
--- /dev/null
+++ b/src/include/access/tupconvert.h
@@ -0,0 +1,51 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupconvert.h
+ *	  Tuple conversion support.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tupconvert.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TUPCONVERT_H
+#define TUPCONVERT_H
+
+#include "access/attmap.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+#include "executor/tuptable.h"
+#include "nodes/bitmapset.h"
+
+
+typedef struct TupleConversionMap
+{
+	TupleDesc	indesc;			/* tupdesc for source rowtype */
+	TupleDesc	outdesc;		/* tupdesc for result rowtype */
+	AttrMap    *attrMap;		/* indexes of input fields, or 0 for null */
+	Datum	   *invalues;		/* workspace for deconstructing source */
+	bool	   *inisnull;
+	Datum	   *outvalues;		/* workspace for constructing result */
+	bool	   *outisnull;
+} TupleConversionMap;
+
+
+extern TupleConversionMap *convert_tuples_by_position(TupleDesc indesc,
+													  TupleDesc outdesc,
+													  const char *msg);
+
+extern TupleConversionMap *convert_tuples_by_name(TupleDesc indesc,
+												  TupleDesc outdesc);
+
+extern HeapTuple execute_attr_map_tuple(HeapTuple tuple, TupleConversionMap *map);
+extern TupleTableSlot *execute_attr_map_slot(AttrMap *attrMap,
+											 TupleTableSlot *in_slot,
+											 TupleTableSlot *out_slot);
+extern Bitmapset *execute_attr_map_cols(AttrMap *attrMap, Bitmapset *inbitmap);
+
+extern void free_conversion_map(TupleConversionMap *map);
+
+#endif							/* TUPCONVERT_H */
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
new file mode 100644
index 0000000..f45d47a
--- /dev/null
+++ b/src/include/access/tupdesc.h
@@ -0,0 +1,154 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupdesc.h
+ *	  POSTGRES tuple descriptor definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tupdesc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TUPDESC_H
+#define TUPDESC_H
+
+#include "access/attnum.h"
+#include "catalog/pg_attribute.h"
+#include "nodes/pg_list.h"
+
+
+typedef struct AttrDefault
+{
+	AttrNumber	adnum;
+	char	   *adbin;			/* nodeToString representation of expr */
+} AttrDefault;
+
+typedef struct ConstrCheck
+{
+	char	   *ccname;
+	char	   *ccbin;			/* nodeToString representation of expr */
+	bool		ccvalid;
+	bool		ccnoinherit;	/* this is a non-inheritable constraint */
+} ConstrCheck;
+
+/* This structure contains constraints of a tuple */
+typedef struct TupleConstr
+{
+	AttrDefault *defval;		/* array */
+	ConstrCheck *check;			/* array */
+	struct AttrMissing *missing;	/* missing attributes values, NULL if none */
+	uint16		num_defval;
+	uint16		num_check;
+	bool		has_not_null;
+	bool		has_generated_stored;
+} TupleConstr;
+
+/*
+ * This struct is passed around within the backend to describe the structure
+ * of tuples.  For tuples coming from on-disk relations, the information is
+ * collected from the pg_attribute, pg_attrdef, and pg_constraint catalogs.
+ * Transient row types (such as the result of a join query) have anonymous
+ * TupleDesc structs that generally omit any constraint info; therefore the
+ * structure is designed to let the constraints be omitted efficiently.
+ *
+ * Note that only user attributes, not system attributes, are mentioned in
+ * TupleDesc.
+ *
+ * If the tupdesc is known to correspond to a named rowtype (such as a table's
+ * rowtype) then tdtypeid identifies that type and tdtypmod is -1.  Otherwise
+ * tdtypeid is RECORDOID, and tdtypmod can be either -1 for a fully anonymous
+ * row type, or a value >= 0 to allow the rowtype to be looked up in the
+ * typcache.c type cache.
+ *
+ * Note that tdtypeid is never the OID of a domain over composite, even if
+ * we are dealing with values that are known (at some higher level) to be of
+ * a domain-over-composite type.  This is because tdtypeid/tdtypmod need to
+ * match up with the type labeling of composite Datums, and those are never
+ * explicitly marked as being of a domain type, either.
+ *
+ * Tuple descriptors that live in caches (relcache or typcache, at present)
+ * are reference-counted: they can be deleted when their reference count goes
+ * to zero.  Tuple descriptors created by the executor need no reference
+ * counting, however: they are simply created in the appropriate memory
+ * context and go away when the context is freed.  We set the tdrefcount
+ * field of such a descriptor to -1, while reference-counted descriptors
+ * always have tdrefcount >= 0.
+ */
+typedef struct TupleDescData
+{
+	int			natts;			/* number of attributes in the tuple */
+	Oid			tdtypeid;		/* composite type ID for tuple type */
+	int32		tdtypmod;		/* typmod for tuple type */
+	int			tdrefcount;		/* reference count, or -1 if not counting */
+	TupleConstr *constr;		/* constraints, or NULL if none */
+	/* attrs[N] is the description of Attribute Number N+1 */
+	FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
+}			TupleDescData;
+typedef struct TupleDescData *TupleDesc;
+
+/* Accessor for the i'th attribute of tupdesc. */
+#define TupleDescAttr(tupdesc, i) (&(tupdesc)->attrs[(i)])
+
+extern TupleDesc CreateTemplateTupleDesc(int natts);
+
+extern TupleDesc CreateTupleDesc(int natts, Form_pg_attribute *attrs);
+
+extern TupleDesc CreateTupleDescCopy(TupleDesc tupdesc);
+
+extern TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc);
+
+#define TupleDescSize(src) \
+	(offsetof(struct TupleDescData, attrs) + \
+	 (src)->natts * sizeof(FormData_pg_attribute))
+
+extern void TupleDescCopy(TupleDesc dst, TupleDesc src);
+
+extern void TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
+							   TupleDesc src, AttrNumber srcAttno);
+
+extern void FreeTupleDesc(TupleDesc tupdesc);
+
+extern void IncrTupleDescRefCount(TupleDesc tupdesc);
+extern void DecrTupleDescRefCount(TupleDesc tupdesc);
+
+#define PinTupleDesc(tupdesc) \
+	do { \
+		if ((tupdesc)->tdrefcount >= 0) \
+			IncrTupleDescRefCount(tupdesc); \
+	} while (0)
+
+#define ReleaseTupleDesc(tupdesc) \
+	do { \
+		if ((tupdesc)->tdrefcount >= 0) \
+			DecrTupleDescRefCount(tupdesc); \
+	} while (0)
+
+extern bool equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2);
+
+extern uint32 hashTupleDesc(TupleDesc tupdesc);
+
+extern void TupleDescInitEntry(TupleDesc desc,
+							   AttrNumber attributeNumber,
+							   const char *attributeName,
+							   Oid oidtypeid,
+							   int32 typmod,
+							   int attdim);
+
+extern void TupleDescInitBuiltinEntry(TupleDesc desc,
+									  AttrNumber attributeNumber,
+									  const char *attributeName,
+									  Oid oidtypeid,
+									  int32 typmod,
+									  int attdim);
+
+extern void TupleDescInitEntryCollation(TupleDesc desc,
+										AttrNumber attributeNumber,
+										Oid collationid);
+
+extern TupleDesc BuildDescForRelation(List *schema);
+
+extern TupleDesc BuildDescFromLists(List *names, List *types, List *typmods, List *collations);
+
+#endif							/* TUPDESC_H */
diff --git a/src/include/access/tupdesc_details.h b/src/include/access/tupdesc_details.h
new file mode 100644
index 0000000..d0d2c99
--- /dev/null
+++ b/src/include/access/tupdesc_details.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupdesc_details.h
+ *	  POSTGRES tuple descriptor definitions we can't include everywhere
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tupdesc_details.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef TUPDESC_DETAILS_H
+#define TUPDESC_DETAILS_H
+
+/*
+ * Structure used to represent value to be used when the attribute is not
+ * present at all in a tuple, i.e. when the column was created after the tuple
+ */
+typedef struct AttrMissing
+{
+	bool		am_present;		/* true if non-NULL missing value exists */
+	Datum		am_value;		/* value when attribute is missing */
+} AttrMissing;
+
+#endif							/* TUPDESC_DETAILS_H */
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
new file mode 100644
index 0000000..65ac1ef
--- /dev/null
+++ b/src/include/access/tupmacs.h
@@ -0,0 +1,247 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupmacs.h
+ *	  Tuple macros used by both index tuples and heap tuples.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tupmacs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TUPMACS_H
+#define TUPMACS_H
+
+#include "catalog/pg_type_d.h"	/* for TYPALIGN macros */
+
+
+/*
+ * Check a tuple's null bitmap to determine whether the attribute is null.
+ * Note that a 0 in the null bitmap indicates a null, while 1 indicates
+ * non-null.
+ */
+#define att_isnull(ATT, BITS) (!((BITS)[(ATT) >> 3] & (1 << ((ATT) & 0x07))))
+
+/*
+ * Given a Form_pg_attribute and a pointer into a tuple's data area,
+ * return the correct value or pointer.
+ *
+ * We return a Datum value in all cases.  If the attribute has "byval" false,
+ * we return the same pointer into the tuple data area that we're passed.
+ * Otherwise, we return the correct number of bytes fetched from the data
+ * area and extended to Datum form.
+ *
+ * On machines where Datum is 8 bytes, we support fetching 8-byte byval
+ * attributes; otherwise, only 1, 2, and 4-byte values are supported.
+ *
+ * Note that T must already be properly aligned for this to work correctly.
+ */
+#define fetchatt(A,T) fetch_att(T, (A)->attbyval, (A)->attlen)
+
+/*
+ * Same, but work from byval/len parameters rather than Form_pg_attribute.
+ */
+#if SIZEOF_DATUM == 8
+
+#define fetch_att(T,attbyval,attlen) \
+( \
+	(attbyval) ? \
+	( \
+		(attlen) == (int) sizeof(Datum) ? \
+			*((Datum *)(T)) \
+		: \
+	  ( \
+		(attlen) == (int) sizeof(int32) ? \
+			Int32GetDatum(*((int32 *)(T))) \
+		: \
+		( \
+			(attlen) == (int) sizeof(int16) ? \
+				Int16GetDatum(*((int16 *)(T))) \
+			: \
+			( \
+				AssertMacro((attlen) == 1), \
+				CharGetDatum(*((char *)(T))) \
+			) \
+		) \
+	  ) \
+	) \
+	: \
+	PointerGetDatum((char *) (T)) \
+)
+#else							/* SIZEOF_DATUM != 8 */
+
+#define fetch_att(T,attbyval,attlen) \
+( \
+	(attbyval) ? \
+	( \
+		(attlen) == (int) sizeof(int32) ? \
+			Int32GetDatum(*((int32 *)(T))) \
+		: \
+		( \
+			(attlen) == (int) sizeof(int16) ? \
+				Int16GetDatum(*((int16 *)(T))) \
+			: \
+			( \
+				AssertMacro((attlen) == 1), \
+				CharGetDatum(*((char *)(T))) \
+			) \
+		) \
+	) \
+	: \
+	PointerGetDatum((char *) (T)) \
+)
+#endif							/* SIZEOF_DATUM == 8 */
+
+/*
+ * att_align_datum aligns the given offset as needed for a datum of alignment
+ * requirement attalign and typlen attlen.  attdatum is the Datum variable
+ * we intend to pack into a tuple (it's only accessed if we are dealing with
+ * a varlena type).  Note that this assumes the Datum will be stored as-is;
+ * callers that are intending to convert non-short varlena datums to short
+ * format have to account for that themselves.
+ */
+#define att_align_datum(cur_offset, attalign, attlen, attdatum) \
+( \
+	((attlen) == -1 && VARATT_IS_SHORT(DatumGetPointer(attdatum))) ? \
+	(uintptr_t) (cur_offset) : \
+	att_align_nominal(cur_offset, attalign) \
+)
+
+/*
+ * att_align_pointer performs the same calculation as att_align_datum,
+ * but is used when walking a tuple.  attptr is the current actual data
+ * pointer; when accessing a varlena field we have to "peek" to see if we
+ * are looking at a pad byte or the first byte of a 1-byte-header datum.
+ * (A zero byte must be either a pad byte, or the first byte of a correctly
+ * aligned 4-byte length word; in either case we can align safely.  A non-zero
+ * byte must be either a 1-byte length word, or the first byte of a correctly
+ * aligned 4-byte length word; in either case we need not align.)
+ *
+ * Note: some callers pass a "char *" pointer for cur_offset.  This is
+ * a bit of a hack but should work all right as long as uintptr_t is the
+ * correct width.
+ */
+#define att_align_pointer(cur_offset, attalign, attlen, attptr) \
+( \
+	((attlen) == -1 && VARATT_NOT_PAD_BYTE(attptr)) ? \
+	(uintptr_t) (cur_offset) : \
+	att_align_nominal(cur_offset, attalign) \
+)
+
+/*
+ * att_align_nominal aligns the given offset as needed for a datum of alignment
+ * requirement attalign, ignoring any consideration of packed varlena datums.
+ * There are three main use cases for using this macro directly:
+ *	* we know that the att in question is not varlena (attlen != -1);
+ *	  in this case it is cheaper than the above macros and just as good.
+ *	* we need to estimate alignment padding cost abstractly, ie without
+ *	  reference to a real tuple.  We must assume the worst case that
+ *	  all varlenas are aligned.
+ *	* within arrays and multiranges, we unconditionally align varlenas (XXX this
+ *	  should be revisited, probably).
+ *
+ * The attalign cases are tested in what is hopefully something like their
+ * frequency of occurrence.
+ */
+#define att_align_nominal(cur_offset, attalign) \
+( \
+	((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \
+	 (((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \
+	  (((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \
+	   ( \
+			AssertMacro((attalign) == TYPALIGN_SHORT), \
+			SHORTALIGN(cur_offset) \
+	   ))) \
+)
+
+/*
+ * att_addlength_datum increments the given offset by the space needed for
+ * the given Datum variable.  attdatum is only accessed if we are dealing
+ * with a variable-length attribute.
+ */
+#define att_addlength_datum(cur_offset, attlen, attdatum) \
+	att_addlength_pointer(cur_offset, attlen, DatumGetPointer(attdatum))
+
+/*
+ * att_addlength_pointer performs the same calculation as att_addlength_datum,
+ * but is used when walking a tuple --- attptr is the pointer to the field
+ * within the tuple.
+ *
+ * Note: some callers pass a "char *" pointer for cur_offset.  This is
+ * actually perfectly OK, but probably should be cleaned up along with
+ * the same practice for att_align_pointer.
+ */
+#define att_addlength_pointer(cur_offset, attlen, attptr) \
+( \
+	((attlen) > 0) ? \
+	( \
+		(cur_offset) + (attlen) \
+	) \
+	: (((attlen) == -1) ? \
+	( \
+		(cur_offset) + VARSIZE_ANY(attptr) \
+	) \
+	: \
+	( \
+		AssertMacro((attlen) == -2), \
+		(cur_offset) + (strlen((char *) (attptr)) + 1) \
+	)) \
+)
+
+/*
+ * store_att_byval is a partial inverse of fetch_att: store a given Datum
+ * value into a tuple data area at the specified address.  However, it only
+ * handles the byval case, because in typical usage the caller needs to
+ * distinguish by-val and by-ref cases anyway, and so a do-it-all macro
+ * wouldn't be convenient.
+ */
+#if SIZEOF_DATUM == 8
+
+#define store_att_byval(T,newdatum,attlen) \
+	do { \
+		switch (attlen) \
+		{ \
+			case sizeof(char): \
+				*(char *) (T) = DatumGetChar(newdatum); \
+				break; \
+			case sizeof(int16): \
+				*(int16 *) (T) = DatumGetInt16(newdatum); \
+				break; \
+			case sizeof(int32): \
+				*(int32 *) (T) = DatumGetInt32(newdatum); \
+				break; \
+			case sizeof(Datum): \
+				*(Datum *) (T) = (newdatum); \
+				break; \
+			default: \
+				elog(ERROR, "unsupported byval length: %d", \
+					 (int) (attlen)); \
+				break; \
+		} \
+	} while (0)
+#else							/* SIZEOF_DATUM != 8 */
+
+#define store_att_byval(T,newdatum,attlen) \
+	do { \
+		switch (attlen) \
+		{ \
+			case sizeof(char): \
+				*(char *) (T) = DatumGetChar(newdatum); \
+				break; \
+			case sizeof(int16): \
+				*(int16 *) (T) = DatumGetInt16(newdatum); \
+				break; \
+			case sizeof(int32): \
+				*(int32 *) (T) = DatumGetInt32(newdatum); \
+				break; \
+			default: \
+				elog(ERROR, "unsupported byval length: %d", \
+					 (int) (attlen)); \
+				break; \
+		} \
+	} while (0)
+#endif							/* SIZEOF_DATUM == 8 */
+
+#endif
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
new file mode 100644
index 0000000..edb797b
--- /dev/null
+++ b/src/include/access/twophase.h
@@ -0,0 +1,63 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase.h
+ *	  Two-phase-commit related declarations.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/twophase.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_H
+#define TWOPHASE_H
+
+#include "access/xact.h"
+#include "access/xlogdefs.h"
+#include "datatype/timestamp.h"
+#include "storage/lock.h"
+
+/*
+ * GlobalTransactionData is defined in twophase.c; other places have no
+ * business knowing the internal definition.
+ */
+typedef struct GlobalTransactionData *GlobalTransaction;
+
+/* GUC variable */
+extern PGDLLIMPORT int max_prepared_xacts;
+
+extern Size TwoPhaseShmemSize(void);
+extern void TwoPhaseShmemInit(void);
+
+extern void AtAbort_Twophase(void);
+extern void PostPrepare_Twophase(void);
+
+extern TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
+												bool *have_more);
+extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid, bool lock_held);
+extern BackendId TwoPhaseGetDummyBackendId(TransactionId xid, bool lock_held);
+
+extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid,
+										 TimestampTz prepared_at,
+										 Oid owner, Oid databaseid);
+
+extern void StartPrepare(GlobalTransaction gxact);
+extern void EndPrepare(GlobalTransaction gxact);
+extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
+
+extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
+												 int *nxids_p);
+extern void StandbyRecoverPreparedTransactions(void);
+extern void RecoverPreparedTransactions(void);
+
+extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);
+
+extern void FinishPreparedTransaction(const char *gid, bool isCommit);
+
+extern void PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
+						   XLogRecPtr end_lsn, RepOriginId origin_id);
+extern void PrepareRedoRemove(TransactionId xid, bool giveWarning);
+extern void restoreTwoPhaseData(void);
+#endif							/* TWOPHASE_H */
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
new file mode 100644
index 0000000..2709d72
--- /dev/null
+++ b/src/include/access/twophase_rmgr.h
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase_rmgr.h
+ *	  Two-phase-commit resource managers definition
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/twophase_rmgr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_RMGR_H
+#define TWOPHASE_RMGR_H
+
+typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info,
+								  void *recdata, uint32 len);
+typedef uint8 TwoPhaseRmgrId;
+
+/*
+ * Built-in resource managers
+ */
+#define TWOPHASE_RM_END_ID			0
+#define TWOPHASE_RM_LOCK_ID			1
+#define TWOPHASE_RM_PGSTAT_ID		2
+#define TWOPHASE_RM_MULTIXACT_ID	3
+#define TWOPHASE_RM_PREDICATELOCK_ID	4
+#define TWOPHASE_RM_MAX_ID			TWOPHASE_RM_PREDICATELOCK_ID
+
+extern const TwoPhaseCallback twophase_recover_callbacks[];
+extern const TwoPhaseCallback twophase_postcommit_callbacks[];
+extern const TwoPhaseCallback twophase_postabort_callbacks[];
+extern const TwoPhaseCallback twophase_standby_recover_callbacks[];
+
+
+extern void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
+								   const void *data, uint32 len);
+
+#endif							/* TWOPHASE_RMGR_H */
diff --git a/src/include/access/valid.h b/src/include/access/valid.h
new file mode 100644
index 0000000..a462113
--- /dev/null
+++ b/src/include/access/valid.h
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * valid.h
+ *	  POSTGRES tuple qualification validity definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/valid.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef VALID_H
+#define VALID_H
+
+/*
+ *		HeapKeyTest
+ *
+ *		Test a heap tuple to see if it satisfies a scan key.
+ */
+#define HeapKeyTest(tuple, \
+					tupdesc, \
+					nkeys, \
+					keys, \
+					result) \
+do \
+{ \
+	/* Use underscores to protect the variables passed in as parameters */ \
+	int			__cur_nkeys = (nkeys); \
+	ScanKey		__cur_keys = (keys); \
+ \
+	(result) = true; /* may change */ \
+	for (; __cur_nkeys--; __cur_keys++) \
+	{ \
+		Datum	__atp; \
+		bool	__isnull; \
+		Datum	__test; \
+ \
+		if (__cur_keys->sk_flags & SK_ISNULL) \
+		{ \
+			(result) = false; \
+			break; \
+		} \
+ \
+		__atp = heap_getattr((tuple), \
+							 __cur_keys->sk_attno, \
+							 (tupdesc), \
+							 &__isnull); \
+ \
+		if (__isnull) \
+		{ \
+			(result) = false; \
+			break; \
+		} \
+ \
+		__test = FunctionCall2Coll(&__cur_keys->sk_func, \
+								   __cur_keys->sk_collation, \
+								   __atp, __cur_keys->sk_argument); \
+ \
+		if (!DatumGetBool(__test)) \
+		{ \
+			(result) = false; \
+			break; \
+		} \
+	} \
+} while (0)
+
+#endif							/* VALID_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
new file mode 100644
index 0000000..0981b21
--- /dev/null
+++ b/src/include/access/visibilitymap.h
@@ -0,0 +1,42 @@
+/*-------------------------------------------------------------------------
+ *
+ * visibilitymap.h
+ *		visibility map interface
+ *
+ *
+ * Portions Copyright (c) 2007-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/visibilitymap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef VISIBILITYMAP_H
+#define VISIBILITYMAP_H
+
+#include "access/visibilitymapdefs.h"
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "utils/relcache.h"
+
+/* Macros for visibilitymap test */
+#define VM_ALL_VISIBLE(r, b, v) \
+	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_VISIBLE) != 0)
+#define VM_ALL_FROZEN(r, b, v) \
+	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
+
+extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
+								Buffer vmbuf, uint8 flags);
+extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
+							  Buffer *vmbuf);
+extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
+extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
+							  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+							  uint8 flags);
+extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
+extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
+extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
+												  BlockNumber nheapblocks);
+
+#endif							/* VISIBILITYMAP_H */
diff --git a/src/include/access/visibilitymapdefs.h b/src/include/access/visibilitymapdefs.h
new file mode 100644
index 0000000..58be5a4
--- /dev/null
+++ b/src/include/access/visibilitymapdefs.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * visibilitymapdefs.h
+ *		macros for accessing contents of visibility map pages
+ *
+ *
+ * Copyright (c) 2021, PostgreSQL Global Development Group
+ *
+ * src/include/access/visibilitymapdefs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef VISIBILITYMAPDEFS_H
+#define VISIBILITYMAPDEFS_H
+
+/* Number of bits for one heap page */
+#define BITS_PER_HEAPBLOCK 2
+
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE	0x01
+#define VISIBILITYMAP_ALL_FROZEN	0x02
+#define VISIBILITYMAP_VALID_BITS	0x03	/* OR of all valid visibilitymap
+											 * flags bits */
+
+#endif							/* VISIBILITYMAPDEFS_H */
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
new file mode 100644
index 0000000..c538758
--- /dev/null
+++ b/src/include/access/xact.h
@@ -0,0 +1,476 @@
+/*-------------------------------------------------------------------------
+ *
+ * xact.h
+ *	  postgres transaction system definitions
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xact.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef XACT_H
+#define XACT_H
+
+#include "access/transam.h"
+#include "access/xlogreader.h"
+#include "datatype/timestamp.h"
+#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/relfilenode.h"
+#include "storage/sinval.h"
+
+/*
+ * Maximum size of Global Transaction ID (including '\0').
+ *
+ * Note that the max value of GIDSIZE must fit in the uint16 gidlen,
+ * specified in TwoPhaseFileHeader.
+ */
+#define GIDSIZE 200
+
+/*
+ * Xact isolation levels
+ */
+#define XACT_READ_UNCOMMITTED	0
+#define XACT_READ_COMMITTED		1
+#define XACT_REPEATABLE_READ	2
+#define XACT_SERIALIZABLE		3
+
+extern int	DefaultXactIsoLevel;
+extern PGDLLIMPORT int XactIsoLevel;
+
+/*
+ * We implement three isolation levels internally.
+ * The two stronger ones use one snapshot per database transaction;
+ * the others use one snapshot per statement.
+ * Serializable uses predicate locks in addition to snapshots.
+ * These macros should be used to check which isolation level is selected.
+ */
+#define IsolationUsesXactSnapshot() (XactIsoLevel >= XACT_REPEATABLE_READ)
+#define IsolationIsSerializable() (XactIsoLevel == XACT_SERIALIZABLE)
+
+/* Xact read-only state */
+extern bool DefaultXactReadOnly;
+extern bool XactReadOnly;
+
+/* flag for logging statements in this transaction */
+extern bool xact_is_sampled;
+
+/*
+ * Xact is deferrable -- only meaningful (currently) for read only
+ * SERIALIZABLE transactions
+ */
+extern bool DefaultXactDeferrable;
+extern bool XactDeferrable;
+
+typedef enum
+{
+	SYNCHRONOUS_COMMIT_OFF,		/* asynchronous commit */
+	SYNCHRONOUS_COMMIT_LOCAL_FLUSH, /* wait for local flush only */
+	SYNCHRONOUS_COMMIT_REMOTE_WRITE,	/* wait for local flush and remote
+										 * write */
+	SYNCHRONOUS_COMMIT_REMOTE_FLUSH,	/* wait for local and remote flush */
+	SYNCHRONOUS_COMMIT_REMOTE_APPLY /* wait for local and remote flush and
+									 * remote apply */
+}			SyncCommitLevel;
+
+/* Define the default setting for synchronous_commit */
+#define SYNCHRONOUS_COMMIT_ON	SYNCHRONOUS_COMMIT_REMOTE_FLUSH
+
+/* Synchronous commit level */
+extern int	synchronous_commit;
+
+/* used during logical streaming of a transaction */
+extern PGDLLIMPORT TransactionId CheckXidAlive;
+extern PGDLLIMPORT bool bsysscan;
+
+/*
+ * Miscellaneous flag bits to record events which occur on the top level
+ * transaction. These flags are only persisted in MyXactFlags and are intended
+ * so we remember to do certain things later in the transaction. This is
+ * globally accessible, so can be set from anywhere in the code which requires
+ * recording flags.
+ */
+extern int	MyXactFlags;
+
+/*
+ * XACT_FLAGS_ACCESSEDTEMPNAMESPACE - set when a temporary object is accessed.
+ * We don't allow PREPARE TRANSACTION in that case.
+ */
+#define XACT_FLAGS_ACCESSEDTEMPNAMESPACE		(1U << 0)
+
+/*
+ * XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK - records whether the top level xact
+ * logged any Access Exclusive Locks.
+ */
+#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK	(1U << 1)
+
+/*
+ * XACT_FLAGS_NEEDIMMEDIATECOMMIT - records whether the top level statement
+ * is one that requires immediate commit, such as CREATE DATABASE.
+ */
+#define XACT_FLAGS_NEEDIMMEDIATECOMMIT			(1U << 2)
+
+/*
+ *	start- and end-of-transaction callbacks for dynamically loaded modules
+ */
+typedef enum
+{
+	XACT_EVENT_COMMIT,
+	XACT_EVENT_PARALLEL_COMMIT,
+	XACT_EVENT_ABORT,
+	XACT_EVENT_PARALLEL_ABORT,
+	XACT_EVENT_PREPARE,
+	XACT_EVENT_PRE_COMMIT,
+	XACT_EVENT_PARALLEL_PRE_COMMIT,
+	XACT_EVENT_PRE_PREPARE
+} XactEvent;
+
+typedef void (*XactCallback) (XactEvent event, void *arg);
+
+typedef enum
+{
+	SUBXACT_EVENT_START_SUB,
+	SUBXACT_EVENT_COMMIT_SUB,
+	SUBXACT_EVENT_ABORT_SUB,
+	SUBXACT_EVENT_PRE_COMMIT_SUB
+} SubXactEvent;
+
+typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
+								 SubTransactionId parentSubid, void *arg);
+
+
+/* ----------------
+ *		transaction-related XLOG entries
+ * ----------------
+ */
+
+/*
+ * XLOG allows to store some information in high 4 bits of log record xl_info
+ * field. We use 3 for the opcode, and one about an optional flag variable.
+ */
+#define XLOG_XACT_COMMIT			0x00
+#define XLOG_XACT_PREPARE			0x10
+#define XLOG_XACT_ABORT				0x20
+#define XLOG_XACT_COMMIT_PREPARED	0x30
+#define XLOG_XACT_ABORT_PREPARED	0x40
+#define XLOG_XACT_ASSIGNMENT		0x50
+#define XLOG_XACT_INVALIDATIONS		0x60
+/* free opcode 0x70 */
+
+/* mask for filtering opcodes out of xl_info */
+#define XLOG_XACT_OPMASK			0x70
+
+/* does this record have a 'xinfo' field or not */
+#define XLOG_XACT_HAS_INFO			0x80
+
+/*
+ * The following flags, stored in xinfo, determine which information is
+ * contained in commit/abort records.
+ */
+#define XACT_XINFO_HAS_DBINFO			(1U << 0)
+#define XACT_XINFO_HAS_SUBXACTS			(1U << 1)
+#define XACT_XINFO_HAS_RELFILENODES		(1U << 2)
+#define XACT_XINFO_HAS_INVALS			(1U << 3)
+#define XACT_XINFO_HAS_TWOPHASE			(1U << 4)
+#define XACT_XINFO_HAS_ORIGIN			(1U << 5)
+#define XACT_XINFO_HAS_AE_LOCKS			(1U << 6)
+#define XACT_XINFO_HAS_GID				(1U << 7)
+
+/*
+ * Also stored in xinfo, these indicating a variety of additional actions that
+ * need to occur when emulating transaction effects during recovery.
+ *
+ * They are named XactCompletion... to differentiate them from
+ * EOXact... routines which run at the end of the original transaction
+ * completion.
+ */
+#define XACT_COMPLETION_APPLY_FEEDBACK			(1U << 29)
+#define XACT_COMPLETION_UPDATE_RELCACHE_FILE	(1U << 30)
+#define XACT_COMPLETION_FORCE_SYNC_COMMIT		(1U << 31)
+
+/* Access macros for above flags */
+#define XactCompletionApplyFeedback(xinfo) \
+	((xinfo & XACT_COMPLETION_APPLY_FEEDBACK) != 0)
+#define XactCompletionRelcacheInitFileInval(xinfo) \
+	((xinfo & XACT_COMPLETION_UPDATE_RELCACHE_FILE) != 0)
+#define XactCompletionForceSyncCommit(xinfo) \
+	((xinfo & XACT_COMPLETION_FORCE_SYNC_COMMIT) != 0)
+
+typedef struct xl_xact_assignment
+{
+	TransactionId xtop;			/* assigned XID's top-level XID */
+	int			nsubxacts;		/* number of subtransaction XIDs */
+	TransactionId xsub[FLEXIBLE_ARRAY_MEMBER];	/* assigned subxids */
+} xl_xact_assignment;
+
+#define MinSizeOfXactAssignment offsetof(xl_xact_assignment, xsub)
+
+/*
+ * Commit and abort records can contain a lot of information. But a large
+ * portion of the records won't need all possible pieces of information. So we
+ * only include what's needed.
+ *
+ * A minimal commit/abort record only consists of a xl_xact_commit/abort
+ * struct. The presence of additional information is indicated by bits set in
+ * 'xl_xact_xinfo->xinfo'. The presence of the xinfo field itself is signaled
+ * by a set XLOG_XACT_HAS_INFO bit in the xl_info field.
+ *
+ * NB: All the individual data chunks should be sized to multiples of
+ * sizeof(int) and only require int32 alignment. If they require bigger
+ * alignment, they need to be copied upon reading.
+ */
+
+/* sub-records for commit/abort */
+
+typedef struct xl_xact_xinfo
+{
+	/*
+	 * Even though we right now only require 1 byte of space in xinfo we use
+	 * four so following records don't have to care about alignment. Commit
+	 * records can be large, so copying large portions isn't attractive.
+	 */
+	uint32		xinfo;
+} xl_xact_xinfo;
+
+typedef struct xl_xact_dbinfo
+{
+	Oid			dbId;			/* MyDatabaseId */
+	Oid			tsId;			/* MyDatabaseTableSpace */
+} xl_xact_dbinfo;
+
+typedef struct xl_xact_subxacts
+{
+	int			nsubxacts;		/* number of subtransaction XIDs */
+	TransactionId subxacts[FLEXIBLE_ARRAY_MEMBER];
+} xl_xact_subxacts;
+#define MinSizeOfXactSubxacts offsetof(xl_xact_subxacts, subxacts)
+
+typedef struct xl_xact_relfilenodes
+{
+	int			nrels;			/* number of relations */
+	RelFileNode xnodes[FLEXIBLE_ARRAY_MEMBER];
+} xl_xact_relfilenodes;
+#define MinSizeOfXactRelfilenodes offsetof(xl_xact_relfilenodes, xnodes)
+
+typedef struct xl_xact_invals
+{
+	int			nmsgs;			/* number of shared inval msgs */
+	SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER];
+} xl_xact_invals;
+#define MinSizeOfXactInvals offsetof(xl_xact_invals, msgs)
+
+typedef struct xl_xact_twophase
+{
+	TransactionId xid;
+} xl_xact_twophase;
+
+typedef struct xl_xact_origin
+{
+	XLogRecPtr	origin_lsn;
+	TimestampTz origin_timestamp;
+} xl_xact_origin;
+
+typedef struct xl_xact_commit
+{
+	TimestampTz xact_time;		/* time of commit */
+
+	/* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */
+	/* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */
+	/* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */
+	/* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */
+	/* xl_xact_invals follows if XINFO_HAS_INVALS */
+	/* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+	/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
+	/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
+} xl_xact_commit;
+#define MinSizeOfXactCommit (offsetof(xl_xact_commit, xact_time) + sizeof(TimestampTz))
+
+typedef struct xl_xact_abort
+{
+	TimestampTz xact_time;		/* time of abort */
+
+	/* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */
+	/* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */
+	/* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */
+	/* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */
+	/* No invalidation messages needed. */
+	/* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+	/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
+	/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
+} xl_xact_abort;
+#define MinSizeOfXactAbort sizeof(xl_xact_abort)
+
+typedef struct xl_xact_prepare
+{
+	uint32		magic;			/* format identifier */
+	uint32		total_len;		/* actual file length */
+	TransactionId xid;			/* original transaction XID */
+	Oid			database;		/* OID of database it was in */
+	TimestampTz prepared_at;	/* time of preparation */
+	Oid			owner;			/* user running the transaction */
+	int32		nsubxacts;		/* number of following subxact XIDs */
+	int32		ncommitrels;	/* number of delete-on-commit rels */
+	int32		nabortrels;		/* number of delete-on-abort rels */
+	int32		ninvalmsgs;		/* number of cache invalidation messages */
+	bool		initfileinval;	/* does relcache init file need invalidation? */
+	uint16		gidlen;			/* length of the GID - GID follows the header */
+	XLogRecPtr	origin_lsn;		/* lsn of this record at origin node */
+	TimestampTz origin_timestamp;	/* time of prepare at origin node */
+} xl_xact_prepare;
+
+/*
+ * Commit/Abort records in the above form are a bit verbose to parse, so
+ * there's a deconstructed versions generated by ParseCommit/AbortRecord() for
+ * easier consumption.
+ */
+typedef struct xl_xact_parsed_commit
+{
+	TimestampTz xact_time;
+	uint32		xinfo;
+
+	Oid			dbId;			/* MyDatabaseId */
+	Oid			tsId;			/* MyDatabaseTableSpace */
+
+	int			nsubxacts;
+	TransactionId *subxacts;
+
+	int			nrels;
+	RelFileNode *xnodes;
+
+	int			nmsgs;
+	SharedInvalidationMessage *msgs;
+
+	TransactionId twophase_xid; /* only for 2PC */
+	char		twophase_gid[GIDSIZE];	/* only for 2PC */
+	int			nabortrels;		/* only for 2PC */
+	RelFileNode *abortnodes;	/* only for 2PC */
+
+	XLogRecPtr	origin_lsn;
+	TimestampTz origin_timestamp;
+} xl_xact_parsed_commit;
+
+typedef xl_xact_parsed_commit xl_xact_parsed_prepare;
+
+typedef struct xl_xact_parsed_abort
+{
+	TimestampTz xact_time;
+	uint32		xinfo;
+
+	Oid			dbId;			/* MyDatabaseId */
+	Oid			tsId;			/* MyDatabaseTableSpace */
+
+	int			nsubxacts;
+	TransactionId *subxacts;
+
+	int			nrels;
+	RelFileNode *xnodes;
+
+	TransactionId twophase_xid; /* only for 2PC */
+	char		twophase_gid[GIDSIZE];	/* only for 2PC */
+
+	XLogRecPtr	origin_lsn;
+	TimestampTz origin_timestamp;
+} xl_xact_parsed_abort;
+
+
+/* ----------------
+ *		extern definitions
+ * ----------------
+ */
+extern bool IsTransactionState(void);
+extern bool IsAbortedTransactionBlockState(void);
+extern TransactionId GetTopTransactionId(void);
+extern TransactionId GetTopTransactionIdIfAny(void);
+extern TransactionId GetCurrentTransactionId(void);
+extern TransactionId GetCurrentTransactionIdIfAny(void);
+extern TransactionId GetStableLatestTransactionId(void);
+extern SubTransactionId GetCurrentSubTransactionId(void);
+extern FullTransactionId GetTopFullTransactionId(void);
+extern FullTransactionId GetTopFullTransactionIdIfAny(void);
+extern FullTransactionId GetCurrentFullTransactionId(void);
+extern FullTransactionId GetCurrentFullTransactionIdIfAny(void);
+extern void MarkCurrentTransactionIdLoggedIfAny(void);
+extern bool SubTransactionIsActive(SubTransactionId subxid);
+extern CommandId GetCurrentCommandId(bool used);
+extern void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts);
+extern TimestampTz GetCurrentTransactionStartTimestamp(void);
+extern TimestampTz GetCurrentStatementStartTimestamp(void);
+extern TimestampTz GetCurrentTransactionStopTimestamp(void);
+extern void SetCurrentStatementStartTimestamp(void);
+extern int	GetCurrentTransactionNestLevel(void);
+extern bool TransactionIdIsCurrentTransactionId(TransactionId xid);
+extern void CommandCounterIncrement(void);
+extern void ForceSyncCommit(void);
+extern void StartTransactionCommand(void);
+extern void SaveTransactionCharacteristics(void);
+extern void RestoreTransactionCharacteristics(void);
+extern void CommitTransactionCommand(void);
+extern void AbortCurrentTransaction(void);
+extern void BeginTransactionBlock(void);
+extern bool EndTransactionBlock(bool chain);
+extern bool PrepareTransactionBlock(const char *gid);
+extern void UserAbortTransactionBlock(bool chain);
+extern void BeginImplicitTransactionBlock(void);
+extern void EndImplicitTransactionBlock(void);
+extern void ReleaseSavepoint(const char *name);
+extern void DefineSavepoint(const char *name);
+extern void RollbackToSavepoint(const char *name);
+extern void BeginInternalSubTransaction(const char *name);
+extern void ReleaseCurrentSubTransaction(void);
+extern void RollbackAndReleaseCurrentSubTransaction(void);
+extern bool IsSubTransaction(void);
+extern Size EstimateTransactionStateSpace(void);
+extern void SerializeTransactionState(Size maxsize, char *start_address);
+extern void StartParallelWorkerTransaction(char *tstatespace);
+extern void EndParallelWorkerTransaction(void);
+extern bool IsTransactionBlock(void);
+extern bool IsTransactionOrTransactionBlock(void);
+extern char TransactionBlockStatusCode(void);
+extern void AbortOutOfAnyTransaction(void);
+extern void PreventInTransactionBlock(bool isTopLevel, const char *stmtType);
+extern void RequireTransactionBlock(bool isTopLevel, const char *stmtType);
+extern void WarnNoTransactionBlock(bool isTopLevel, const char *stmtType);
+extern bool IsInTransactionBlock(bool isTopLevel);
+extern void RegisterXactCallback(XactCallback callback, void *arg);
+extern void UnregisterXactCallback(XactCallback callback, void *arg);
+extern void RegisterSubXactCallback(SubXactCallback callback, void *arg);
+extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);
+
+extern bool IsSubTransactionAssignmentPending(void);
+extern void MarkSubTransactionAssigned(void);
+
+extern int	xactGetCommittedChildren(TransactionId **ptr);
+
+extern XLogRecPtr XactLogCommitRecord(TimestampTz commit_time,
+									  int nsubxacts, TransactionId *subxacts,
+									  int nrels, RelFileNode *rels,
+									  int nmsgs, SharedInvalidationMessage *msgs,
+									  bool relcacheInval,
+									  int xactflags,
+									  TransactionId twophase_xid,
+									  const char *twophase_gid);
+
+extern XLogRecPtr XactLogAbortRecord(TimestampTz abort_time,
+									 int nsubxacts, TransactionId *subxacts,
+									 int nrels, RelFileNode *rels,
+									 int xactflags, TransactionId twophase_xid,
+									 const char *twophase_gid);
+extern void xact_redo(XLogReaderState *record);
+
+/* xactdesc.c */
+extern void xact_desc(StringInfo buf, XLogReaderState *record);
+extern const char *xact_identify(uint8 info);
+
+/* also in xactdesc.c, so they can be shared between front/backend code */
+extern void ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *parsed);
+extern void ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed);
+extern void ParsePrepareRecord(uint8 info, xl_xact_prepare *xlrec, xl_xact_parsed_prepare *parsed);
+
+extern void EnterParallelMode(void);
+extern void ExitParallelMode(void);
+extern bool IsInParallelMode(void);
+
+#endif							/* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
new file mode 100644
index 0000000..ee3e369
--- /dev/null
+++ b/src/include/access/xlog.h
@@ -0,0 +1,406 @@
+/*
+ * xlog.h
+ *
+ * PostgreSQL write-ahead log manager
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlog.h
+ */
+#ifndef XLOG_H
+#define XLOG_H
+
+#include "access/rmgr.h"
+#include "access/xlogdefs.h"
+#include "access/xloginsert.h"
+#include "access/xlogreader.h"
+#include "datatype/timestamp.h"
+#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
+
+
+/* Sync methods */
+#define SYNC_METHOD_FSYNC		0
+#define SYNC_METHOD_FDATASYNC	1
+#define SYNC_METHOD_OPEN		2	/* for O_SYNC */
+#define SYNC_METHOD_FSYNC_WRITETHROUGH	3
+#define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
+extern int	sync_method;
+
+extern PGDLLIMPORT TimeLineID ThisTimeLineID;	/* current TLI */
+
+/*
+ * Prior to 8.4, all activity during recovery was carried out by the startup
+ * process. This local variable continues to be used in many parts of the
+ * code to indicate actions taken by RecoveryManagers. Other processes that
+ * potentially perform work during recovery should check RecoveryInProgress().
+ * See XLogCtl notes in xlog.c.
+ */
+extern bool InRecovery;
+
+/*
+ * Like InRecovery, standbyState is only valid in the startup process.
+ * In all other processes it will have the value STANDBY_DISABLED (so
+ * InHotStandby will read as false).
+ *
+ * In DISABLED state, we're performing crash recovery or hot standby was
+ * disabled in postgresql.conf.
+ *
+ * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
+ * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
+ * to initialize our primary-transaction tracking system.
+ *
+ * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
+ * state. The tracked information might still be incomplete, so we can't allow
+ * connections yet, but redo functions must update the in-memory state when
+ * appropriate.
+ *
+ * In SNAPSHOT_READY mode, we have full knowledge of transactions that are
+ * (or were) running on the primary at the current WAL location. Snapshots
+ * can be taken, and read-only queries can be run.
+ */
+typedef enum
+{
+	STANDBY_DISABLED,
+	STANDBY_INITIALIZED,
+	STANDBY_SNAPSHOT_PENDING,
+	STANDBY_SNAPSHOT_READY
+} HotStandbyState;
+
+extern HotStandbyState standbyState;
+
+#define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
+
+/*
+ * Recovery target type.
+ * Only set during a Point in Time recovery, not when in standby mode.
+ */
+typedef enum
+{
+	RECOVERY_TARGET_UNSET,
+	RECOVERY_TARGET_XID,
+	RECOVERY_TARGET_TIME,
+	RECOVERY_TARGET_NAME,
+	RECOVERY_TARGET_LSN,
+	RECOVERY_TARGET_IMMEDIATE
+} RecoveryTargetType;
+
+/*
+ * Recovery target TimeLine goal
+ */
+typedef enum
+{
+	RECOVERY_TARGET_TIMELINE_CONTROLFILE,
+	RECOVERY_TARGET_TIMELINE_LATEST,
+	RECOVERY_TARGET_TIMELINE_NUMERIC
+} RecoveryTargetTimeLineGoal;
+
+extern XLogRecPtr ProcLastRecPtr;
+extern XLogRecPtr XactLastRecEnd;
+extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
+
+extern bool reachedConsistency;
+
+/* these variables are GUC parameters related to XLOG */
+extern int	wal_segment_size;
+extern int	min_wal_size_mb;
+extern int	max_wal_size_mb;
+extern int	wal_keep_size_mb;
+extern int	max_slot_wal_keep_size_mb;
+extern int	XLOGbuffers;
+extern int	XLogArchiveTimeout;
+extern int	wal_retrieve_retry_interval;
+extern char *XLogArchiveCommand;
+extern bool EnableHotStandby;
+extern bool fullPageWrites;
+extern bool wal_log_hints;
+extern bool wal_compression;
+extern bool wal_init_zero;
+extern bool wal_recycle;
+extern bool *wal_consistency_checking;
+extern char *wal_consistency_checking_string;
+extern bool log_checkpoints;
+extern char *recoveryRestoreCommand;
+extern char *recoveryEndCommand;
+extern char *archiveCleanupCommand;
+extern bool recoveryTargetInclusive;
+extern int	recoveryTargetAction;
+extern int	recovery_min_apply_delay;
+extern char *PrimaryConnInfo;
+extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
+extern bool track_wal_io_timing;
+
+/* indirectly set via GUC system */
+extern TransactionId recoveryTargetXid;
+extern char *recovery_target_time_string;
+extern const char *recoveryTargetName;
+extern XLogRecPtr recoveryTargetLSN;
+extern RecoveryTargetType recoveryTarget;
+extern char *PromoteTriggerFile;
+extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
+extern TimeLineID recoveryTargetTLIRequested;
+extern TimeLineID recoveryTargetTLI;
+
+extern int	CheckPointSegments;
+
+/* option set locally in startup process only when signal files exist */
+extern bool StandbyModeRequested;
+extern bool StandbyMode;
+
+/* Archive modes */
+typedef enum ArchiveMode
+{
+	ARCHIVE_MODE_OFF = 0,		/* disabled */
+	ARCHIVE_MODE_ON,			/* enabled while server is running normally */
+	ARCHIVE_MODE_ALWAYS			/* enabled always (even during recovery) */
+} ArchiveMode;
+extern int	XLogArchiveMode;
+
+/* WAL levels */
+typedef enum WalLevel
+{
+	WAL_LEVEL_MINIMAL = 0,
+	WAL_LEVEL_REPLICA,
+	WAL_LEVEL_LOGICAL
+} WalLevel;
+
+/* Recovery states */
+typedef enum RecoveryState
+{
+	RECOVERY_STATE_CRASH = 0,	/* crash recovery */
+	RECOVERY_STATE_ARCHIVE,		/* archive recovery */
+	RECOVERY_STATE_DONE			/* currently in production */
+} RecoveryState;
+
+/* Recovery pause states */
+typedef enum RecoveryPauseState
+{
+	RECOVERY_NOT_PAUSED,		/* pause not requested */
+	RECOVERY_PAUSE_REQUESTED,	/* pause requested, but not yet paused */
+	RECOVERY_PAUSED				/* recovery is paused */
+} RecoveryPauseState;
+
+extern PGDLLIMPORT int wal_level;
+
+/* Is WAL archiving enabled (always or only while server is running normally)? */
+#define XLogArchivingActive() \
+	(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
+/* Is WAL archiving enabled always (even during recovery)? */
+#define XLogArchivingAlways() \
+	(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
+#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
+
+/*
+ * Is WAL-logging necessary for archival or log-shipping, or can we skip
+ * WAL-logging if we fsync() the data before committing instead?
+ */
+#define XLogIsNeeded() (wal_level >= WAL_LEVEL_REPLICA)
+
+/*
+ * Is a full-page image needed for hint bit updates?
+ *
+ * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
+ * we have to protect them against torn page writes.  When you only set
+ * individual bits on a page, it's still consistent no matter what combination
+ * of the bits make it to disk, but the checksum wouldn't match.  Also WAL-log
+ * them if forced by wal_log_hints=on.
+ */
+#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
+
+/* Do we need to WAL-log information required only for Hot Standby and logical replication? */
+#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
+
+/* Do we need to WAL-log information required only for logical replication? */
+#define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL)
+
+#ifdef WAL_DEBUG
+extern bool XLOG_DEBUG;
+#endif
+
+/*
+ * OR-able request flag bits for checkpoints.  The "cause" bits are used only
+ * for logging purposes.  Note: the flags must be defined so that it's
+ * sensible to OR together request flags arising from different requestors.
+ */
+
+/* These directly affect the behavior of CreateCheckPoint and subsidiaries */
+#define CHECKPOINT_IS_SHUTDOWN	0x0001	/* Checkpoint is for shutdown */
+#define CHECKPOINT_END_OF_RECOVERY	0x0002	/* Like shutdown checkpoint, but
+											 * issued at end of WAL recovery */
+#define CHECKPOINT_IMMEDIATE	0x0004	/* Do it without delays */
+#define CHECKPOINT_FORCE		0x0008	/* Force even if no activity */
+#define CHECKPOINT_FLUSH_ALL	0x0010	/* Flush all pages, including those
+										 * belonging to unlogged tables */
+/* These are important to RequestCheckpoint */
+#define CHECKPOINT_WAIT			0x0020	/* Wait for completion */
+#define CHECKPOINT_REQUESTED	0x0040	/* Checkpoint request has been made */
+/* These indicate the cause of a checkpoint request */
+#define CHECKPOINT_CAUSE_XLOG	0x0080	/* XLOG consumption */
+#define CHECKPOINT_CAUSE_TIME	0x0100	/* Elapsed time */
+
+/*
+ * Flag bits for the record being inserted, set using XLogSetRecordFlags().
+ */
+#define XLOG_INCLUDE_ORIGIN		0x01	/* include the replication origin */
+#define XLOG_MARK_UNIMPORTANT	0x02	/* record not important for durability */
+#define XLOG_INCLUDE_XID		0x04	/* WAL-internal message-passing hack */
+
+
+/* Checkpoint statistics */
+typedef struct CheckpointStatsData
+{
+	TimestampTz ckpt_start_t;	/* start of checkpoint */
+	TimestampTz ckpt_write_t;	/* start of flushing buffers */
+	TimestampTz ckpt_sync_t;	/* start of fsyncs */
+	TimestampTz ckpt_sync_end_t;	/* end of fsyncs */
+	TimestampTz ckpt_end_t;		/* end of checkpoint */
+
+	int			ckpt_bufs_written;	/* # of buffers written */
+
+	int			ckpt_segs_added;	/* # of new xlog segments created */
+	int			ckpt_segs_removed;	/* # of xlog segments deleted */
+	int			ckpt_segs_recycled; /* # of xlog segments recycled */
+
+	int			ckpt_sync_rels; /* # of relations synced */
+	uint64		ckpt_longest_sync;	/* Longest sync for one relation */
+	uint64		ckpt_agg_sync_time; /* The sum of all the individual sync
+									 * times, which is not necessarily the
+									 * same as the total elapsed time for the
+									 * entire sync phase. */
+} CheckpointStatsData;
+
+extern CheckpointStatsData CheckpointStats;
+
+/*
+ * GetWALAvailability return codes
+ */
+typedef enum WALAvailability
+{
+	WALAVAIL_INVALID_LSN,		/* parameter error */
+	WALAVAIL_RESERVED,			/* WAL segment is within max_wal_size */
+	WALAVAIL_EXTENDED,			/* WAL segment is reserved by a slot or
+								 * wal_keep_size */
+	WALAVAIL_UNRESERVED,		/* no longer reserved, but not removed yet */
+	WALAVAIL_REMOVED			/* WAL segment has been removed */
+} WALAvailability;
+
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
+								   XLogRecPtr fpw_lsn,
+								   uint8 flags,
+								   int num_fpi);
+extern void XLogFlush(XLogRecPtr RecPtr);
+extern bool XLogBackgroundFlush(void);
+extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
+extern int	XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
+extern int	XLogFileOpen(XLogSegNo segno);
+
+extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
+extern XLogSegNo XLogGetLastRemovedSegno(void);
+extern void XLogSetAsyncXactLSN(XLogRecPtr record);
+extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
+
+extern void xlog_redo(XLogReaderState *record);
+extern void xlog_desc(StringInfo buf, XLogReaderState *record);
+extern const char *xlog_identify(uint8 info);
+
+extern void issue_xlog_fsync(int fd, XLogSegNo segno);
+
+extern bool RecoveryInProgress(void);
+extern RecoveryState GetRecoveryState(void);
+extern bool HotStandbyActive(void);
+extern bool HotStandbyActiveInReplay(void);
+extern bool XLogInsertAllowed(void);
+extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
+extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
+extern XLogRecPtr GetXLogInsertRecPtr(void);
+extern XLogRecPtr GetXLogWriteRecPtr(void);
+extern RecoveryPauseState GetRecoveryPauseState(void);
+extern void SetRecoveryPause(bool recoveryPause);
+extern TimestampTz GetLatestXTime(void);
+extern TimestampTz GetCurrentChunkReplayStartTime(void);
+
+extern void UpdateControlFile(void);
+extern uint64 GetSystemIdentifier(void);
+extern char *GetMockAuthenticationNonce(void);
+extern bool DataChecksumsEnabled(void);
+extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
+extern Size XLOGShmemSize(void);
+extern void XLOGShmemInit(void);
+extern void BootStrapXLOG(void);
+extern void LocalProcessControlFile(bool reset);
+extern void StartupXLOG(void);
+extern void ShutdownXLOG(int code, Datum arg);
+extern void InitXLOGAccess(void);
+extern void CreateCheckPoint(int flags);
+extern bool CreateRestartPoint(int flags);
+extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
+extern XLogRecPtr CalculateMaxmumSafeLSN(void);
+extern void XLogPutNextOid(Oid nextOid);
+extern XLogRecPtr XLogRestorePoint(const char *rpName);
+extern void UpdateFullPageWrites(void);
+extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
+extern XLogRecPtr GetRedoRecPtr(void);
+extern XLogRecPtr GetInsertRecPtr(void);
+extern XLogRecPtr GetFlushRecPtr(void);
+extern XLogRecPtr GetLastImportantRecPtr(void);
+extern void RemovePromoteSignalFiles(void);
+
+extern bool PromoteIsTriggered(void);
+extern bool CheckPromoteSignal(void);
+extern void WakeupRecovery(void);
+extern void SetWalWriterSleeping(bool sleeping);
+
+extern void StartupRequestWalReceiverRestart(void);
+extern void XLogRequestWalReceiverReply(void);
+
+extern void assign_max_wal_size(int newval, void *extra);
+extern void assign_checkpoint_completion_target(double newval, void *extra);
+
+/*
+ * Routines to start, stop, and get status of a base backup.
+ */
+
+/*
+ * Session-level status of base backups
+ *
+ * This is used in parallel with the shared memory status to control parallel
+ * execution of base backup functions for a given session, be it a backend
+ * dedicated to replication or a normal backend connected to a database. The
+ * update of the session-level status happens at the same time as the shared
+ * memory counters to keep a consistent global and local state of the backups
+ * running.
+ */
+typedef enum SessionBackupState
+{
+	SESSION_BACKUP_NONE,
+	SESSION_BACKUP_EXCLUSIVE,
+	SESSION_BACKUP_NON_EXCLUSIVE
+} SessionBackupState;
+
+extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
+									 TimeLineID *starttli_p, StringInfo labelfile,
+									 List **tablespaces, StringInfo tblspcmapfile);
+extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
+									TimeLineID *stoptli_p);
+extern void do_pg_abort_backup(int code, Datum arg);
+extern void register_persistent_abort_backup_handler(void);
+extern SessionBackupState get_backup_status(void);
+
+/* File path names (all relative to $PGDATA) */
+#define RECOVERY_SIGNAL_FILE	"recovery.signal"
+#define STANDBY_SIGNAL_FILE		"standby.signal"
+#define BACKUP_LABEL_FILE		"backup_label"
+#define BACKUP_LABEL_OLD		"backup_label.old"
+
+#define TABLESPACE_MAP			"tablespace_map"
+#define TABLESPACE_MAP_OLD		"tablespace_map.old"
+
+/* files to signal promotion to primary */
+#define PROMOTE_SIGNAL_FILE		"promote"
+
+#endif							/* XLOG_H */
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
new file mode 100644
index 0000000..dcf41e9
--- /dev/null
+++ b/src/include/access/xlog_internal.h
@@ -0,0 +1,336 @@
+/*
+ * xlog_internal.h
+ *
+ * PostgreSQL write-ahead log internal declarations
+ *
+ * NOTE: this file is intended to contain declarations useful for
+ * manipulating the XLOG files directly, but it is not supposed to be
+ * needed by rmgr routines (redo support for individual record types).
+ * So the XLogRecord typedef and associated stuff appear in xlogrecord.h.
+ *
+ * Note: This file must be includable in both frontend and backend contexts,
+ * to allow stand-alone tools like pg_receivewal to deal with WAL files.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlog_internal.h
+ */
+#ifndef XLOG_INTERNAL_H
+#define XLOG_INTERNAL_H
+
+#include "access/xlogdefs.h"
+#include "access/xlogreader.h"
+#include "datatype/timestamp.h"
+#include "lib/stringinfo.h"
+#include "pgtime.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+
+/*
+ * Each page of XLOG file has a header like this:
+ */
+#define XLOG_PAGE_MAGIC 0xD10D	/* can be used as WAL version indicator */
+
+typedef struct XLogPageHeaderData
+{
+	uint16		xlp_magic;		/* magic value for correctness checks */
+	uint16		xlp_info;		/* flag bits, see below */
+	TimeLineID	xlp_tli;		/* TimeLineID of first record on page */
+	XLogRecPtr	xlp_pageaddr;	/* XLOG address of this page */
+
+	/*
+	 * When there is not enough space on current page for whole record, we
+	 * continue on the next page.  xlp_rem_len is the number of bytes
+	 * remaining from a previous page; it tracks xl_tot_len in the initial
+	 * header.  Note that the continuation data isn't necessarily aligned.
+	 */
+	uint32		xlp_rem_len;	/* total len of remaining data for record */
+} XLogPageHeaderData;
+
+#define SizeOfXLogShortPHD	MAXALIGN(sizeof(XLogPageHeaderData))
+
+typedef XLogPageHeaderData *XLogPageHeader;
+
+/*
+ * When the XLP_LONG_HEADER flag is set, we store additional fields in the
+ * page header.  (This is ordinarily done just in the first page of an
+ * XLOG file.)	The additional fields serve to identify the file accurately.
+ */
+typedef struct XLogLongPageHeaderData
+{
+	XLogPageHeaderData std;		/* standard header fields */
+	uint64		xlp_sysid;		/* system identifier from pg_control */
+	uint32		xlp_seg_size;	/* just as a cross-check */
+	uint32		xlp_xlog_blcksz;	/* just as a cross-check */
+} XLogLongPageHeaderData;
+
+#define SizeOfXLogLongPHD	MAXALIGN(sizeof(XLogLongPageHeaderData))
+
+typedef XLogLongPageHeaderData *XLogLongPageHeader;
+
+/* When record crosses page boundary, set this flag in new page's header */
+#define XLP_FIRST_IS_CONTRECORD		0x0001
+/* This flag indicates a "long" page header */
+#define XLP_LONG_HEADER				0x0002
+/* This flag indicates backup blocks starting in this page are optional */
+#define XLP_BKP_REMOVABLE			0x0004
+/* Replaces a missing contrecord; see CreateOverwriteContrecordRecord */
+#define XLP_FIRST_IS_OVERWRITE_CONTRECORD 0x0008
+/* All defined flag bits in xlp_info (used for validity checking of header) */
+#define XLP_ALL_FLAGS				0x000F
+
+#define XLogPageHeaderSize(hdr)		\
+	(((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD)
+
+/* wal_segment_size can range from 1MB to 1GB */
+#define WalSegMinSize 1024 * 1024
+#define WalSegMaxSize 1024 * 1024 * 1024
+/* default number of min and max wal segments */
+#define DEFAULT_MIN_WAL_SEGS 5
+#define DEFAULT_MAX_WAL_SEGS 64
+
+/* check that the given size is a valid wal_segment_size */
+#define IsPowerOf2(x) (x > 0 && ((x) & ((x)-1)) == 0)
+#define IsValidWalSegSize(size) \
+	 (IsPowerOf2(size) && \
+	 ((size) >= WalSegMinSize && (size) <= WalSegMaxSize))
+
+#define XLogSegmentsPerXLogId(wal_segsz_bytes)	\
+	(UINT64CONST(0x100000000) / (wal_segsz_bytes))
+
+#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest) \
+		(dest) = (segno) * (wal_segsz_bytes) + (offset)
+
+#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)	\
+	((xlogptr) & ((wal_segsz_bytes) - 1))
+
+/*
+ * Compute a segment number from an XLogRecPtr.
+ *
+ * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg,
+ * a boundary byte is taken to be in the previous segment.  This is suitable
+ * for deciding which segment to write given a pointer to a record end,
+ * for example.
+ */
+#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes) \
+	logSegNo = (xlrp) / (wal_segsz_bytes)
+
+#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \
+	logSegNo = ((xlrp) - 1) / (wal_segsz_bytes)
+
+/*
+ * Convert values of GUCs measured in megabytes to equiv. segment count.
+ * Rounds down.
+ */
+#define XLogMBVarToSegs(mbvar, wal_segsz_bytes) \
+	((mbvar) / ((wal_segsz_bytes) / (1024 * 1024)))
+
+/*
+ * Is an XLogRecPtr within a particular XLOG segment?
+ *
+ * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg,
+ * a boundary byte is taken to be in the previous segment.
+ */
+#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes) \
+	(((xlrp) / (wal_segsz_bytes)) == (logSegNo))
+
+#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \
+	((((xlrp) - 1) / (wal_segsz_bytes)) == (logSegNo))
+
+/* Check if an XLogRecPtr value is in a plausible range */
+#define XRecOffIsValid(xlrp) \
+		((xlrp) % XLOG_BLCKSZ >= SizeOfXLogShortPHD)
+
+/*
+ * The XLog directory and control file (relative to $PGDATA)
+ */
+#define XLOGDIR				"pg_wal"
+#define XLOG_CONTROL_FILE	"global/pg_control"
+
+/*
+ * These macros encapsulate knowledge about the exact layout of XLog file
+ * names, timeline history file names, and archive-status file names.
+ */
+#define MAXFNAMELEN		64
+
+/* Length of XLog file name */
+#define XLOG_FNAME_LEN	   24
+
+/*
+ * Generate a WAL segment file name.  Do not use this macro in a helper
+ * function allocating the result generated.
+ */
+#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)	\
+	snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli,		\
+			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)))
+
+#define XLogFileNameById(fname, tli, log, seg)	\
+	snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg)
+
+#define IsXLogFileName(fname) \
+	(strlen(fname) == XLOG_FNAME_LEN && \
+	 strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN)
+
+/*
+ * XLOG segment with .partial suffix.  Used by pg_receivewal and at end of
+ * archive recovery, when we want to archive a WAL segment but it might not
+ * be complete yet.
+ */
+#define IsPartialXLogFileName(fname)	\
+	(strlen(fname) == XLOG_FNAME_LEN + strlen(".partial") &&	\
+	 strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN &&		\
+	 strcmp((fname) + XLOG_FNAME_LEN, ".partial") == 0)
+
+#define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)	\
+	do {												\
+		uint32 log;										\
+		uint32 seg;										\
+		sscanf(fname, "%08X%08X%08X", tli, &log, &seg); \
+		*logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg; \
+	} while (0)
+
+#define XLogFilePath(path, tli, logSegNo, wal_segsz_bytes)	\
+	snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli,	\
+			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)))
+
+#define TLHistoryFileName(fname, tli)	\
+	snprintf(fname, MAXFNAMELEN, "%08X.history", tli)
+
+#define IsTLHistoryFileName(fname)	\
+	(strlen(fname) == 8 + strlen(".history") &&		\
+	 strspn(fname, "0123456789ABCDEF") == 8 &&		\
+	 strcmp((fname) + 8, ".history") == 0)
+
+#define TLHistoryFilePath(path, tli)	\
+	snprintf(path, MAXPGPATH, XLOGDIR "/%08X.history", tli)
+
+#define StatusFilePath(path, xlog, suffix)	\
+	snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
+
+#define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes) \
+	snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, \
+			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) (XLogSegmentOffset(startpoint, wal_segsz_bytes)))
+
+#define IsBackupHistoryFileName(fname) \
+	(strlen(fname) > XLOG_FNAME_LEN && \
+	 strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
+	 strcmp((fname) + strlen(fname) - strlen(".backup"), ".backup") == 0)
+
+#define BackupHistoryFilePath(path, tli, logSegNo, startpoint, wal_segsz_bytes)	\
+	snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, \
+			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \
+			 (uint32) (XLogSegmentOffset((startpoint), wal_segsz_bytes)))
+
+/*
+ * Information logged when we detect a change in one of the parameters
+ * important for Hot Standby.
+ */
+typedef struct xl_parameter_change
+{
+	int			MaxConnections;
+	int			max_worker_processes;
+	int			max_wal_senders;
+	int			max_prepared_xacts;
+	int			max_locks_per_xact;
+	int			wal_level;
+	bool		wal_log_hints;
+	bool		track_commit_timestamp;
+} xl_parameter_change;
+
+/* logs restore point */
+typedef struct xl_restore_point
+{
+	TimestampTz rp_time;
+	char		rp_name[MAXFNAMELEN];
+} xl_restore_point;
+
+/* Overwrite of prior contrecord */
+typedef struct xl_overwrite_contrecord
+{
+	XLogRecPtr	overwritten_lsn;
+	TimestampTz overwrite_time;
+} xl_overwrite_contrecord;
+
+/* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */
+typedef struct xl_end_of_recovery
+{
+	TimestampTz end_time;
+	TimeLineID	ThisTimeLineID; /* new TLI */
+	TimeLineID	PrevTimeLineID; /* previous TLI we forked off from */
+} xl_end_of_recovery;
+
+/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+	struct XLogRecData *next;	/* next struct in chain, or NULL */
+	char	   *data;			/* start of rmgr data to include */
+	uint32		len;			/* length of rmgr data to include */
+} XLogRecData;
+
+/*
+ * Recovery target action.
+ */
+typedef enum
+{
+	RECOVERY_TARGET_ACTION_PAUSE,
+	RECOVERY_TARGET_ACTION_PROMOTE,
+	RECOVERY_TARGET_ACTION_SHUTDOWN
+}			RecoveryTargetAction;
+
+/*
+ * Method table for resource managers.
+ *
+ * This struct must be kept in sync with the PG_RMGR definition in
+ * rmgr.c.
+ *
+ * rm_identify must return a name for the record based on xl_info (without
+ * reference to the rmid). For example, XLOG_BTREE_VACUUM would be named
+ * "VACUUM". rm_desc can then be called to obtain additional detail for the
+ * record, if available (e.g. the last block).
+ *
+ * rm_mask takes as input a page modified by the resource manager and masks
+ * out bits that shouldn't be flagged by wal_consistency_checking.
+ *
+ * RmgrTable[] is indexed by RmgrId values (see rmgrlist.h).
+ */
+typedef struct RmgrData
+{
+	const char *rm_name;
+	void		(*rm_redo) (XLogReaderState *record);
+	void		(*rm_desc) (StringInfo buf, XLogReaderState *record);
+	const char *(*rm_identify) (uint8 info);
+	void		(*rm_startup) (void);
+	void		(*rm_cleanup) (void);
+	void		(*rm_mask) (char *pagedata, BlockNumber blkno);
+} RmgrData;
+
+extern const RmgrData RmgrTable[];
+
+/*
+ * Exported to support xlog switching from checkpointer
+ */
+extern pg_time_t GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN);
+extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant);
+
+extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
+
+/*
+ * Exported for the functions in timeline.c and xlogarchive.c.  Only valid
+ * in the startup process.
+ */
+extern bool ArchiveRecoveryRequested;
+extern bool InArchiveRecovery;
+extern bool StandbyMode;
+extern char *recoveryRestoreCommand;
+
+#endif							/* XLOG_INTERNAL_H */
diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h
new file mode 100644
index 0000000..3edd1a9
--- /dev/null
+++ b/src/include/access/xlogarchive.h
@@ -0,0 +1,35 @@
+/*------------------------------------------------------------------------
+ *
+ * xlogarchive.h
+ *		Prototypes for WAL archives in the backend
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/include/access/xlogarchive.h
+ *
+ *------------------------------------------------------------------------
+ */
+
+#ifndef XLOG_ARCHIVE_H
+#define XLOG_ARCHIVE_H
+
+#include "access/xlogdefs.h"
+
+extern bool RestoreArchivedFile(char *path, const char *xlogfname,
+								const char *recovername, off_t expectedSize,
+								bool cleanupEnabled);
+extern void ExecuteRecoveryCommand(const char *command, const char *commandName,
+								   bool failOnSignal);
+extern void KeepFileRestoredFromArchive(const char *path, const char *xlogfname);
+extern void XLogArchiveNotify(const char *xlog);
+extern void XLogArchiveNotifySeg(XLogSegNo segno);
+extern void XLogArchiveForceDone(const char *xlog);
+extern bool XLogArchiveCheckDone(const char *xlog);
+extern bool XLogArchiveIsBusy(const char *xlog);
+extern bool XLogArchiveIsReady(const char *xlog);
+extern bool XLogArchiveIsReadyOrDone(const char *xlog);
+extern void XLogArchiveCleanup(const char *xlog);
+
+#endif							/* XLOG_ARCHIVE_H */
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
new file mode 100644
index 0000000..0940b64
--- /dev/null
+++ b/src/include/access/xlogdefs.h
@@ -0,0 +1,116 @@
+/*
+ * xlogdefs.h
+ *
+ * Postgres write-ahead log manager record pointer and
+ * timeline number definitions
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogdefs.h
+ */
+#ifndef XLOG_DEFS_H
+#define XLOG_DEFS_H
+
+#include <fcntl.h>				/* need open() flags */
+
+/*
+ * Pointer to a location in the XLOG.  These pointers are 64 bits wide,
+ * because we don't want them ever to overflow.
+ */
+typedef uint64 XLogRecPtr;
+
+/*
+ * Zero is used indicate an invalid pointer. Bootstrap skips the first possible
+ * WAL segment, initializing the first WAL page at WAL segment size, so no XLOG
+ * record can begin at zero.
+ */
+#define InvalidXLogRecPtr	0
+#define XLogRecPtrIsInvalid(r)	((r) == InvalidXLogRecPtr)
+
+/*
+ * First LSN to use for "fake" LSNs.
+ *
+ * Values smaller than this can be used for special per-AM purposes.
+ */
+#define FirstNormalUnloggedLSN	((XLogRecPtr) 1000)
+
+/*
+ * Handy macro for printing XLogRecPtr in conventional format, e.g.,
+ *
+ * printf("%X/%X", LSN_FORMAT_ARGS(lsn));
+ */
+#define LSN_FORMAT_ARGS(lsn) (AssertVariableIsOfTypeMacro((lsn), XLogRecPtr), (uint32) ((lsn) >> 32)), ((uint32) (lsn))
+
+/*
+ * XLogSegNo - physical log file sequence number.
+ */
+typedef uint64 XLogSegNo;
+
+/*
+ * TimeLineID (TLI) - identifies different database histories to prevent
+ * confusion after restoring a prior state of a database installation.
+ * TLI does not change in a normal stop/restart of the database (including
+ * crash-and-recover cases); but we must assign a new TLI after doing
+ * a recovery to a prior state, a/k/a point-in-time recovery.  This makes
+ * the new WAL logfile sequence we generate distinguishable from the
+ * sequence that was generated in the previous incarnation.
+ */
+typedef uint32 TimeLineID;
+
+/*
+ * Replication origin id - this is located in this file to avoid having to
+ * include origin.h in a bunch of xlog related places.
+ */
+typedef uint16 RepOriginId;
+
+/*
+ *	Because O_DIRECT bypasses the kernel buffers, and because we never
+ *	read those buffers except during crash recovery or if wal_level != minimal,
+ *	it is a win to use it in all cases where we sync on each write().  We could
+ *	allow O_DIRECT with fsync(), but it is unclear if fsync() could process
+ *	writes not buffered in the kernel.  Also, O_DIRECT is never enough to force
+ *	data to the drives, it merely tries to bypass the kernel cache, so we still
+ *	need O_SYNC/O_DSYNC.
+ */
+#ifdef O_DIRECT
+#define PG_O_DIRECT				O_DIRECT
+#else
+#define PG_O_DIRECT				0
+#endif
+
+/*
+ * This chunk of hackery attempts to determine which file sync methods
+ * are available on the current platform, and to choose an appropriate
+ * default method.  We assume that fsync() is always available, and that
+ * configure determined whether fdatasync() is.
+ */
+#if defined(O_SYNC)
+#define OPEN_SYNC_FLAG		O_SYNC
+#elif defined(O_FSYNC)
+#define OPEN_SYNC_FLAG		O_FSYNC
+#endif
+
+#if defined(O_DSYNC)
+#if defined(OPEN_SYNC_FLAG)
+/* O_DSYNC is distinct? */
+#if O_DSYNC != OPEN_SYNC_FLAG
+#define OPEN_DATASYNC_FLAG		O_DSYNC
+#endif
+#else							/* !defined(OPEN_SYNC_FLAG) */
+/* Win32 only has O_DSYNC */
+#define OPEN_DATASYNC_FLAG		O_DSYNC
+#endif
+#endif
+
+#if defined(PLATFORM_DEFAULT_SYNC_METHOD)
+#define DEFAULT_SYNC_METHOD		PLATFORM_DEFAULT_SYNC_METHOD
+#elif defined(OPEN_DATASYNC_FLAG)
+#define DEFAULT_SYNC_METHOD		SYNC_METHOD_OPEN_DSYNC
+#elif defined(HAVE_FDATASYNC)
+#define DEFAULT_SYNC_METHOD		SYNC_METHOD_FDATASYNC
+#else
+#define DEFAULT_SYNC_METHOD		SYNC_METHOD_FSYNC
+#endif
+
+#endif							/* XLOG_DEFS_H */
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
new file mode 100644
index 0000000..f1d8c39
--- /dev/null
+++ b/src/include/access/xloginsert.h
@@ -0,0 +1,66 @@
+/*
+ * xloginsert.h
+ *
+ * Functions for generating WAL records
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xloginsert.h
+ */
+#ifndef XLOGINSERT_H
+#define XLOGINSERT_H
+
+#include "access/rmgr.h"
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+/*
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_MAX_BLOCK_ID block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
+ */
+#define XLR_NORMAL_MAX_BLOCK_ID		4
+#define XLR_NORMAL_RDATAS			20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE	0x01	/* force a full-page image */
+#define REGBUF_NO_IMAGE		0x02	/* don't take a full-page image */
+#define REGBUF_WILL_INIT	(0x04 | 0x02)	/* page will be re-initialized at
+											 * replay (implies NO_IMAGE) */
+#define REGBUF_STANDARD		0x08	/* page follows "standard" page layout,
+									 * (data between pd_lower and pd_upper
+									 * will be skipped) */
+#define REGBUF_KEEP_DATA	0x10	/* include data even if a full-page image
+									 * is taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern void XLogSetRecordFlags(uint8 flags);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int max_block_id, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+							  ForkNumber forknum, BlockNumber blknum, char *page,
+							  uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
+							  BlockNumber blk, char *page, bool page_std);
+extern void log_newpages(RelFileNode *rnode, ForkNumber forkNum, int num_pages,
+						 BlockNumber *blknos, char **pages, bool page_std);
+extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
+extern void log_newpage_range(Relation rel, ForkNumber forkNum,
+							  BlockNumber startblk, BlockNumber endblk, bool page_std);
+extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
+
+extern void InitXLogInsert(void);
+
+#endif							/* XLOGINSERT_H */
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
new file mode 100644
index 0000000..10458c2
--- /dev/null
+++ b/src/include/access/xlogreader.h
@@ -0,0 +1,340 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogreader.h
+ *		Definitions for the generic XLog reading facility
+ *
+ * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/include/access/xlogreader.h
+ *
+ * NOTES
+ *		See the definition of the XLogReaderState struct for instructions on
+ *		how to use the XLogReader infrastructure.
+ *
+ *		The basic idea is to allocate an XLogReaderState via
+ *		XLogReaderAllocate(), position the reader to the first record with
+ *		XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
+ *		until it returns NULL.
+ *
+ *		Callers supply a page_read callback if they want to call
+ *		XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
+ *		otherwise.  The WALRead function can be used as a helper to write
+ *		page_read callbacks, but it is not mandatory; callers that use it,
+ *		must supply segment_open callbacks.  The segment_close callback
+ *		must always be supplied.
+ *
+ *		After reading a record with XLogReadRecord(), it's decomposed into
+ *		the per-block and main data parts, and the parts can be accessed
+ *		with the XLogRec* macros and functions. You can also decode a
+ *		record that's already constructed in memory, without reading from
+ *		disk, by calling the DecodeXLogRecord() function.
+ *-------------------------------------------------------------------------
+ */
+#ifndef XLOGREADER_H
+#define XLOGREADER_H
+
+#ifndef FRONTEND
+#include "access/transam.h"
+#endif
+
+#include "access/xlogrecord.h"
+
+/* WALOpenSegment represents a WAL segment being read. */
+typedef struct WALOpenSegment
+{
+	int			ws_file;		/* segment file descriptor */
+	XLogSegNo	ws_segno;		/* segment number */
+	TimeLineID	ws_tli;			/* timeline ID of the currently open file */
+} WALOpenSegment;
+
+/* WALSegmentContext carries context information about WAL segments to read */
+typedef struct WALSegmentContext
+{
+	char		ws_dir[MAXPGPATH];
+	int			ws_segsize;
+} WALSegmentContext;
+
+typedef struct XLogReaderState XLogReaderState;
+
+/* Function type definitions for various xlogreader interactions */
+typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
+							   XLogRecPtr targetPagePtr,
+							   int reqLen,
+							   XLogRecPtr targetRecPtr,
+							   char *readBuf);
+typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
+								  XLogSegNo nextSegNo,
+								  TimeLineID *tli_p);
+typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
+
+typedef struct XLogReaderRoutine
+{
+	/*
+	 * Data input callback
+	 *
+	 * This callback shall read at least reqLen valid bytes of the xlog page
+	 * starting at targetPagePtr, and store them in readBuf.  The callback
+	 * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
+	 * -1 on failure.  The callback shall sleep, if necessary, to wait for the
+	 * requested bytes to become available.  The callback will not be invoked
+	 * again for the same page unless more than the returned number of bytes
+	 * are needed.
+	 *
+	 * targetRecPtr is the position of the WAL record we're reading.  Usually
+	 * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
+	 * to read and verify the page or segment header, before it reads the
+	 * actual WAL record it's interested in.  In that case, targetRecPtr can
+	 * be used to determine which timeline to read the page from.
+	 *
+	 * The callback shall set ->seg.ws_tli to the TLI of the file the page was
+	 * read from.
+	 */
+	XLogPageReadCB page_read;
+
+	/*
+	 * Callback to open the specified WAL segment for reading.  ->seg.ws_file
+	 * shall be set to the file descriptor of the opened segment.  In case of
+	 * failure, an error shall be raised by the callback and it shall not
+	 * return.
+	 *
+	 * "nextSegNo" is the number of the segment to be opened.
+	 *
+	 * "tli_p" is an input/output argument. WALRead() uses it to pass the
+	 * timeline in which the new segment should be found, but the callback can
+	 * use it to return the TLI that it actually opened.
+	 */
+	WALSegmentOpenCB segment_open;
+
+	/*
+	 * WAL segment close callback.  ->seg.ws_file shall be set to a negative
+	 * number.
+	 */
+	WALSegmentCloseCB segment_close;
+} XLogReaderRoutine;
+
+#define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
+
+typedef struct
+{
+	/* Is this block ref in use? */
+	bool		in_use;
+
+	/* Identify the block this refers to */
+	RelFileNode rnode;
+	ForkNumber	forknum;
+	BlockNumber blkno;
+
+	/* copy of the fork_flags field from the XLogRecordBlockHeader */
+	uint8		flags;
+
+	/* Information on full-page image, if any */
+	bool		has_image;		/* has image, even for consistency checking */
+	bool		apply_image;	/* has image that should be restored */
+	char	   *bkp_image;
+	uint16		hole_offset;
+	uint16		hole_length;
+	uint16		bimg_len;
+	uint8		bimg_info;
+
+	/* Buffer holding the rmgr-specific data associated with this block */
+	bool		has_data;
+	char	   *data;
+	uint16		data_len;
+	uint16		data_bufsz;
+} DecodedBkpBlock;
+
+struct XLogReaderState
+{
+	/*
+	 * Operational callbacks
+	 */
+	XLogReaderRoutine routine;
+
+	/* ----------------------------------------
+	 * Public parameters
+	 * ----------------------------------------
+	 */
+
+	/*
+	 * System identifier of the xlog files we're about to read.  Set to zero
+	 * (the default value) if unknown or unimportant.
+	 */
+	uint64		system_identifier;
+
+	/*
+	 * Opaque data for callbacks to use.  Not used by XLogReader.
+	 */
+	void	   *private_data;
+
+	/*
+	 * Start and end point of last record read.  EndRecPtr is also used as the
+	 * position to read next.  Calling XLogBeginRead() sets EndRecPtr to the
+	 * starting position and ReadRecPtr to invalid.
+	 */
+	XLogRecPtr	ReadRecPtr;		/* start of last record read */
+	XLogRecPtr	EndRecPtr;		/* end+1 of last record read */
+
+
+	/* ----------------------------------------
+	 * Decoded representation of current record
+	 *
+	 * Use XLogRecGet* functions to investigate the record; these fields
+	 * should not be accessed directly.
+	 * ----------------------------------------
+	 */
+	XLogRecord *decoded_record; /* currently decoded record */
+
+	char	   *main_data;		/* record's main data portion */
+	uint32		main_data_len;	/* main data portion's length */
+	uint32		main_data_bufsz;	/* allocated size of the buffer */
+
+	RepOriginId record_origin;
+
+	TransactionId toplevel_xid; /* XID of top-level transaction */
+
+	/* information about blocks referenced by the record. */
+	DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
+
+	int			max_block_id;	/* highest block_id in use (-1 if none) */
+
+	/* ----------------------------------------
+	 * private/internal state
+	 * ----------------------------------------
+	 */
+
+	/*
+	 * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
+	 * readLen bytes)
+	 */
+	char	   *readBuf;
+	uint32		readLen;
+
+	/* last read XLOG position for data currently in readBuf */
+	WALSegmentContext segcxt;
+	WALOpenSegment seg;
+	uint32		segoff;
+
+	/*
+	 * beginning of prior page read, and its TLI.  Doesn't necessarily
+	 * correspond to what's in readBuf; used for timeline sanity checks.
+	 */
+	XLogRecPtr	latestPagePtr;
+	TimeLineID	latestPageTLI;
+
+	/* beginning of the WAL record being read. */
+	XLogRecPtr	currRecPtr;
+	/* timeline to read it from, 0 if a lookup is required */
+	TimeLineID	currTLI;
+
+	/*
+	 * Safe point to read to in currTLI if current TLI is historical
+	 * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
+	 *
+	 * Actually set to the start of the segment containing the timeline switch
+	 * that ends currTLI's validity, not the LSN of the switch its self, since
+	 * we can't assume the old segment will be present.
+	 */
+	XLogRecPtr	currTLIValidUntil;
+
+	/*
+	 * If currTLI is not the most recent known timeline, the next timeline to
+	 * read from when currTLIValidUntil is reached.
+	 */
+	TimeLineID	nextTLI;
+
+	/*
+	 * Buffer for current ReadRecord result (expandable), used when a record
+	 * crosses a page boundary.
+	 */
+	char	   *readRecordBuf;
+	uint32		readRecordBufSize;
+
+	/* Buffer to hold error message */
+	char	   *errormsg_buf;
+
+	/*
+	 * Set at the end of recovery: the start point of a partial record at the
+	 * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
+	 * location of its first contrecord that went missing.
+	 */
+	XLogRecPtr	abortedRecPtr;
+	XLogRecPtr	missingContrecPtr;
+	/* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
+	XLogRecPtr	overwrittenRecPtr;
+};
+
+/* Get a new XLogReader */
+extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
+										   const char *waldir,
+										   XLogReaderRoutine *routine,
+										   void *private_data);
+extern XLogReaderRoutine *LocalXLogReaderRoutine(void);
+
+/* Free an XLogReader */
+extern void XLogReaderFree(XLogReaderState *state);
+
+/* Position the XLogReader to given record */
+extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
+#ifdef FRONTEND
+extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
+#endif							/* FRONTEND */
+
+/* Read the next XLog record. Returns NULL on end-of-WAL or failure */
+extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
+										 char **errormsg);
+
+/* Validate a page */
+extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
+										 XLogRecPtr recptr, char *phdr);
+
+/*
+ * Error information from WALRead that both backend and frontend caller can
+ * process.  Currently only errors from pg_pread can be reported.
+ */
+typedef struct WALReadError
+{
+	int			wre_errno;		/* errno set by the last pg_pread() */
+	int			wre_off;		/* Offset we tried to read from. */
+	int			wre_req;		/* Bytes requested to be read. */
+	int			wre_read;		/* Bytes read by the last read(). */
+	WALOpenSegment wre_seg;		/* Segment we tried to read from. */
+} WALReadError;
+
+extern bool WALRead(XLogReaderState *state,
+					char *buf, XLogRecPtr startptr, Size count,
+					TimeLineID tli, WALReadError *errinfo);
+
+/* Functions for decoding an XLogRecord */
+
+extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
+							 char **errmsg);
+
+#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
+#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
+#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
+#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
+#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
+#define XLogRecGetOrigin(decoder) ((decoder)->record_origin)
+#define XLogRecGetTopXid(decoder) ((decoder)->toplevel_xid)
+#define XLogRecGetData(decoder) ((decoder)->main_data)
+#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
+#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
+#define XLogRecHasBlockRef(decoder, block_id) \
+	((decoder)->blocks[block_id].in_use)
+#define XLogRecHasBlockImage(decoder, block_id) \
+	((decoder)->blocks[block_id].has_image)
+#define XLogRecBlockImageApply(decoder, block_id) \
+	((decoder)->blocks[block_id].apply_image)
+
+#ifndef FRONTEND
+extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
+#endif
+
+extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
+extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
+extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+							   RelFileNode *rnode, ForkNumber *forknum,
+							   BlockNumber *blknum);
+
+#endif							/* XLOGREADER_H */
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
new file mode 100644
index 0000000..f68cb18
--- /dev/null
+++ b/src/include/access/xlogrecord.h
@@ -0,0 +1,229 @@
+/*
+ * xlogrecord.h
+ *
+ * Definitions for the WAL record format.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogrecord.h
+ */
+#ifndef XLOGRECORD_H
+#define XLOGRECORD_H
+
+#include "access/rmgr.h"
+#include "access/xlogdefs.h"
+#include "port/pg_crc32c.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+/*
+ * The overall layout of an XLOG record is:
+ *		Fixed-size header (XLogRecord struct)
+ *		XLogRecordBlockHeader struct
+ *		XLogRecordBlockHeader struct
+ *		...
+ *		XLogRecordDataHeader[Short|Long] struct
+ *		block data
+ *		block data
+ *		...
+ *		main data
+ *
+ * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
+ * rmgr-specific data not associated with a block.  XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, but the rest of
+ * the fields are not aligned.
+ *
+ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
+ * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
+ * used to distinguish between block references, and the main data structs.
+ */
+typedef struct XLogRecord
+{
+	uint32		xl_tot_len;		/* total len of entire record */
+	TransactionId xl_xid;		/* xact id */
+	XLogRecPtr	xl_prev;		/* ptr to previous record in log */
+	uint8		xl_info;		/* flag bits, see below */
+	RmgrId		xl_rmid;		/* resource manager for this record */
+	/* 2 bytes of padding here, initialize to zero */
+	pg_crc32c	xl_crc;			/* CRC for this record */
+
+	/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
+
+} XLogRecord;
+
+#define SizeOfXLogRecord	(offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
+
+/*
+ * The high 4 bits in xl_info may be used freely by rmgr. The
+ * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
+ * XLogInsert caller. The rest are set internally by XLogInsert.
+ */
+#define XLR_INFO_MASK			0x0F
+#define XLR_RMGR_INFO_MASK		0xF0
+
+/*
+ * If a WAL record modifies any relation files, in ways not covered by the
+ * usual block references, this flag is set. This is not used for anything
+ * by PostgreSQL itself, but it allows external tools that read WAL and keep
+ * track of modified blocks to recognize such special record types.
+ */
+#define XLR_SPECIAL_REL_UPDATE	0x01
+
+/*
+ * Enforces consistency checks of replayed WAL at recovery. If enabled,
+ * each record will log a full-page write for each block modified by the
+ * record and will reuse it afterwards for consistency checks. The caller
+ * of XLogInsert can use this value if necessary, but if
+ * wal_consistency_checking is enabled for a rmgr this is set unconditionally.
+ */
+#define XLR_CHECK_CONSISTENCY	0x02
+
+/*
+ * Header info for block data appended to an XLOG record.
+ *
+ * 'data_length' is the length of the rmgr-specific payload data associated
+ * with this block. It does not include the possible full page image, nor
+ * XLogRecordBlockHeader struct itself.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockHeader struct!
+ * So, the struct must be copied to aligned local storage before use.
+ */
+typedef struct XLogRecordBlockHeader
+{
+	uint8		id;				/* block reference ID */
+	uint8		fork_flags;		/* fork within the relation, and flags */
+	uint16		data_length;	/* number of payload bytes (not including page
+								 * image) */
+
+	/* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
+	/* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
+	/* BlockNumber follows */
+} XLogRecordBlockHeader;
+
+#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
+
+/*
+ * Additional header information when a full-page image is included
+ * (i.e. when BKPBLOCK_HAS_IMAGE is set).
+ *
+ * The XLOG code is aware that PG data pages usually contain an unused "hole"
+ * in the middle, which contains only zero bytes.  Since we know that the
+ * "hole" is all zeros, we remove it from the stored data (and it's not counted
+ * in the XLOG record's CRC, either).  Hence, the amount of block data actually
+ * present is (BLCKSZ - <length of "hole" bytes>).
+ *
+ * Additionally, when wal_compression is enabled, we will try to compress full
+ * page images using the PGLZ compression algorithm, after removing the "hole".
+ * This can reduce the WAL volume, but at some extra cost of CPU spent
+ * on the compression during WAL logging. In this case, since the "hole"
+ * length cannot be calculated by subtracting the number of page image bytes
+ * from BLCKSZ, basically it needs to be stored as an extra information.
+ * But when no "hole" exists, we can assume that the "hole" length is zero
+ * and no such an extra information needs to be stored. Note that
+ * the original version of page image is stored in WAL instead of the
+ * compressed one if the number of bytes saved by compression is less than
+ * the length of extra information. Hence, when a page image is successfully
+ * compressed, the amount of block data actually present is less than
+ * BLCKSZ - the length of "hole" bytes - the length of extra information.
+ */
+typedef struct XLogRecordBlockImageHeader
+{
+	uint16		length;			/* number of page image bytes */
+	uint16		hole_offset;	/* number of bytes before "hole" */
+	uint8		bimg_info;		/* flag bits, see below */
+
+	/*
+	 * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an
+	 * XLogRecordBlockCompressHeader struct follows.
+	 */
+} XLogRecordBlockImageHeader;
+
+#define SizeOfXLogRecordBlockImageHeader	\
+	(offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8))
+
+/* Information stored in bimg_info */
+#define BKPIMAGE_HAS_HOLE		0x01	/* page image has "hole" */
+#define BKPIMAGE_IS_COMPRESSED		0x02	/* page image is compressed */
+#define BKPIMAGE_APPLY		0x04	/* page image should be restored during
+									 * replay */
+
+/*
+ * Extra header information used when page image has "hole" and
+ * is compressed.
+ */
+typedef struct XLogRecordBlockCompressHeader
+{
+	uint16		hole_length;	/* number of bytes in "hole" */
+} XLogRecordBlockCompressHeader;
+
+#define SizeOfXLogRecordBlockCompressHeader \
+	sizeof(XLogRecordBlockCompressHeader)
+
+/*
+ * Maximum size of the header for a block reference. This is used to size a
+ * temporary buffer for constructing the header.
+ */
+#define MaxSizeOfXLogRecordBlockHeader \
+	(SizeOfXLogRecordBlockHeader + \
+	 SizeOfXLogRecordBlockImageHeader + \
+	 SizeOfXLogRecordBlockCompressHeader + \
+	 sizeof(RelFileNode) + \
+	 sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK	0x0F
+#define BKPBLOCK_FLAG_MASK	0xF0
+#define BKPBLOCK_HAS_IMAGE	0x10	/* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA	0x20
+#define BKPBLOCK_WILL_INIT	0x40	/* redo will re-init the page */
+#define BKPBLOCK_SAME_REL	0x80	/* RelFileNode omitted, same as previous */
+
+/*
+ * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
+ * the record. If the length of the data is less than 256 bytes, the short
+ * form is used, with a single byte to hold the length. Otherwise the long
+ * form is used.
+ *
+ * (These structs are currently not used in the code, they are here just for
+ * documentation purposes).
+ */
+typedef struct XLogRecordDataHeaderShort
+{
+	uint8		id;				/* XLR_BLOCK_ID_DATA_SHORT */
+	uint8		data_length;	/* number of payload bytes */
+}			XLogRecordDataHeaderShort;
+
+#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
+
+typedef struct XLogRecordDataHeaderLong
+{
+	uint8		id;				/* XLR_BLOCK_ID_DATA_LONG */
+	/* followed by uint32 data_length, unaligned */
+}			XLogRecordDataHeaderLong;
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
+
+/*
+ * Block IDs used to distinguish different kinds of record fragments. Block
+ * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
+ * any ID number in that range (although you should stick to small numbers,
+ * because the WAL machinery is optimized for that case). A few ID
+ * numbers are reserved to denote the "main" data portion of the record,
+ * as well as replication-supporting transaction metadata.
+ *
+ * The maximum is currently set at 32, quite arbitrarily. Most records only
+ * need a handful of block references, but there are a few exceptions that
+ * need more.
+ */
+#define XLR_MAX_BLOCK_ID			32
+
+#define XLR_BLOCK_ID_DATA_SHORT		255
+#define XLR_BLOCK_ID_DATA_LONG		254
+#define XLR_BLOCK_ID_ORIGIN			253
+#define XLR_BLOCK_ID_TOPLEVEL_XID	252
+
+#endif							/* XLOGRECORD_H */
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
new file mode 100644
index 0000000..9ac602b
--- /dev/null
+++ b/src/include/access/xlogutils.h
@@ -0,0 +1,63 @@
+/*
+ * xlogutils.h
+ *
+ * Utilities for replaying WAL records.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogutils.h
+ */
+#ifndef XLOG_UTILS_H
+#define XLOG_UTILS_H
+
+#include "access/xlogreader.h"
+#include "storage/bufmgr.h"
+
+
+extern bool XLogHaveInvalidPages(void);
+extern void XLogCheckInvalidPages(void);
+
+extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
+extern void XLogDropDatabase(Oid dbid);
+extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
+								 BlockNumber nblocks);
+
+/* Result codes for XLogReadBufferForRedo[Extended] */
+typedef enum
+{
+	BLK_NEEDS_REDO,				/* changes from WAL record need to be applied */
+	BLK_DONE,					/* block is already up-to-date */
+	BLK_RESTORED,				/* block was restored from a full-page image */
+	BLK_NOTFOUND				/* block was not found (and hence does not
+								 * need to be replayed) */
+} XLogRedoAction;
+
+extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
+											uint8 buffer_id, Buffer *buf);
+extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
+extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
+													uint8 buffer_id,
+													ReadBufferMode mode, bool get_cleanup_lock,
+													Buffer *buf);
+
+extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
+									 BlockNumber blkno, ReadBufferMode mode);
+
+extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
+extern void FreeFakeRelcacheEntry(Relation fakerel);
+
+extern int	read_local_xlog_page(XLogReaderState *state,
+								 XLogRecPtr targetPagePtr, int reqLen,
+								 XLogRecPtr targetRecPtr, char *cur_page);
+extern void wal_segment_open(XLogReaderState *state,
+							 XLogSegNo nextSegNo,
+							 TimeLineID *tli_p);
+extern void wal_segment_close(XLogReaderState *state);
+
+extern void XLogReadDetermineTimeline(XLogReaderState *state,
+									  XLogRecPtr wantPage, uint32 wantLength);
+
+extern void WALReadRaiseError(WALReadError *errinfo);
+
+#endif