17 files changed, 12108 insertions, 0 deletions
diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile
new file mode 100644
index 0000000..3fceaee
--- /dev/null
+++ b/src/backend/access/gin/Makefile
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for access/gin
+#
+# IDENTIFICATION
+#    src/backend/access/gin/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/gin
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+	ginarrayproc.o \
+	ginbtree.o \
+	ginbulk.o \
+	gindatapage.o \
+	ginentrypage.o \
+	ginfast.o \
+	ginget.o \
+	gininsert.o \
+	ginlogic.o \
+	ginpostinglist.o \
+	ginscan.o \
+	ginutil.o \
+	ginvacuum.o \
+	ginvalidate.o \
+	ginxlog.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README
new file mode 100644
index 0000000..41d4e1e
--- /dev/null
+++ b/src/backend/access/gin/README
@@ -0,0 +1,562 @@
+src/backend/access/gin/README
+
+Gin for PostgreSQL
+==================
+
+Gin was sponsored by jfg://networks (http://www.jfg-networks.com/)
+
+Gin stands for Generalized Inverted Index and should be considered as a genie,
+not a drink.
+
+Generalized means that the index does not know which operation it accelerates.
+It instead works with custom strategies, defined for specific data types (read
+"Index Method Strategies" in the PostgreSQL documentation). In that sense, Gin
+is similar to GiST and differs from btree indices, which have predefined,
+comparison-based operations.
+
+An inverted index is an index structure storing a set of (key, posting list)
+pairs, where 'posting list' is a set of heap rows in which the key occurs.
+(A text document would usually contain many keys.)  The primary goal of
+Gin indices is support for highly scalable, full-text search in PostgreSQL.
+
+A Gin index consists of a B-tree index constructed over key values,
+where each key is an element of some indexed items (element of array, lexeme
+for tsvector) and where each tuple in a leaf page contains either a pointer to
+a B-tree over item pointers (posting tree), or a simple list of item pointers
+(posting list) if the list is small enough.
+
+Note: There is no delete operation in the key (entry) tree. The reason for
+this is that in our experience, the set of distinct words in a large corpus
+changes very slowly.  This greatly simplifies the code and concurrency
+algorithms.
+
+Core PostgreSQL includes built-in Gin support for one-dimensional arrays
+(eg. integer[], text[]).  The following operations are available:
+
+  * contains: value_array @> query_array
+  * overlaps: value_array && query_array
+  * is contained by: value_array <@ query_array
+
+Synopsis
+--------
+
+=# create index txt_idx on aa using gin(a);
+
+Features
+--------
+
+  * Concurrency
+  * Write-Ahead Logging (WAL).  (Recoverability from crashes.)
+  * User-defined opclasses.  (The scheme is similar to GiST.)
+  * Optimized index creation (Makes use of maintenance_work_mem to accumulate
+    postings in memory.)
+  * Text search support via an opclass
+  * Soft upper limit on the returned results set using a GUC variable:
+    gin_fuzzy_search_limit
+
+Gin Fuzzy Limit
+---------------
+
+There are often situations when a full-text search returns a very large set of
+results.  Since reading tuples from the disk and sorting them could take a
+lot of time, this is unacceptable for production.  (Note that the search
+itself is very fast.)
+
+Such queries usually contain very frequent lexemes, so the results are not
+very helpful. To facilitate execution of such queries Gin has a configurable
+soft upper limit on the size of the returned set, determined by the
+'gin_fuzzy_search_limit' GUC variable.  This is set to 0 by default (no
+limit).
+
+If a non-zero search limit is set, then the returned set is a subset of the
+whole result set, chosen at random.
+
+"Soft" means that the actual number of returned results could differ
+from the specified limit, depending on the query and the quality of the
+system's random number generator.
+
+From experience, a value of 'gin_fuzzy_search_limit' in the thousands
+(eg. 5000-20000) works well.  This means that 'gin_fuzzy_search_limit' will
+have no effect for queries returning a result set with less tuples than this
+number.
+
+Index structure
+---------------
+
+The "items" that a GIN index indexes are composite values that contain
+zero or more "keys".  For example, an item might be an integer array, and
+then the keys would be the individual integer values.  The index actually
+stores and searches for the key values, not the items per se.  In the
+pg_opclass entry for a GIN opclass, the opcintype is the data type of the
+items, and the opckeytype is the data type of the keys.  GIN is optimized
+for cases where items contain many keys and the same key values appear
+in many different items.
+
+A GIN index contains a metapage, a btree of key entries, and possibly
+"posting tree" pages, which hold the overflow when a key entry acquires
+too many heap tuple pointers to fit in a btree page.  Additionally, if the
+fast-update feature is enabled, there can be "list pages" holding "pending"
+key entries that haven't yet been merged into the main btree.  The list
+pages have to be scanned linearly when doing a search, so the pending
+entries should be merged into the main btree before there get to be too
+many of them.  The advantage of the pending list is that bulk insertion of
+a few thousand entries can be much faster than retail insertion.  (The win
+comes mainly from not having to do multiple searches/insertions when the
+same key appears in multiple new heap tuples.)
+
+Key entries are nominally of the same IndexTuple format as used in other
+index types, but since a leaf key entry typically refers to multiple heap
+tuples, there are significant differences.  (See GinFormTuple, which works
+by building a "normal" index tuple and then modifying it.)  The points to
+know are:
+
+* In a single-column index, a key tuple just contains the key datum, but
+in a multi-column index, a key tuple contains the pair (column number,
+key datum) where the column number is stored as an int2.  This is needed
+to support different key data types in different columns.  This much of
+the tuple is built by index_form_tuple according to the usual rules.
+The column number (if present) can never be null, but the key datum can
+be, in which case a null bitmap is present as usual.  (As usual for index
+tuples, the size of the null bitmap is fixed at INDEX_MAX_KEYS.)
+
+* If the key datum is null (ie, IndexTupleHasNulls() is true), then
+just after the nominal index data (ie, at offset IndexInfoFindDataOffset
+or IndexInfoFindDataOffset + sizeof(int2)) there is a byte indicating
+the "category" of the null entry.  These are the possible categories:
+	1 = ordinary null key value extracted from an indexable item
+	2 = placeholder for zero-key indexable item
+	3 = placeholder for null indexable item
+Placeholder null entries are inserted into the index because otherwise
+there would be no index entry at all for an empty or null indexable item,
+which would mean that full index scans couldn't be done and various corner
+cases would give wrong answers.  The different categories of null entries
+are treated as distinct keys by the btree, but heap itempointers for the
+same category of null entry are merged into one index entry just as happens
+with ordinary key entries.
+
+* In a key entry at the btree leaf level, at the next SHORTALIGN boundary,
+there is a list of item pointers, in compressed format (see Posting List
+Compression section), pointing to the heap tuples for which the indexable
+items contain this key. This is called the "posting list".
+
+If the list would be too big for the index tuple to fit on an index page, the
+ItemPointers are pushed out to a separate posting page or pages, and none
+appear in the key entry itself.  The separate pages are called a "posting
+tree" (see below); Note that in either case, the ItemPointers associated with
+a key can easily be read out in sorted order; this is relied on by the scan
+algorithms.
+
+* The index tuple header fields of a leaf key entry are abused as follows:
+
+1) Posting list case:
+
+* ItemPointerGetBlockNumber(&itup->t_tid) contains the offset from index
+  tuple start to the posting list.
+  Access macros: GinGetPostingOffset(itup) / GinSetPostingOffset(itup,n)
+
+* ItemPointerGetOffsetNumber(&itup->t_tid) contains the number of elements
+  in the posting list (number of heap itempointers).
+  Access macros: GinGetNPosting(itup) / GinSetNPosting(itup,n)
+
+* If IndexTupleHasNulls(itup) is true, the null category byte can be
+  accessed/set with GinGetNullCategory(itup,gs) / GinSetNullCategory(itup,gs,c)
+
+* The posting list can be accessed with GinGetPosting(itup)
+
+* If GinItupIsCompressed(itup), the posting list is stored in compressed
+  format. Otherwise it is just an array of ItemPointers. New tuples are always
+  stored in compressed format, uncompressed items can be present if the
+  database was migrated from 9.3 or earlier version.
+
+2) Posting tree case:
+
+* ItemPointerGetBlockNumber(&itup->t_tid) contains the index block number
+  of the root of the posting tree.
+  Access macros: GinGetPostingTree(itup) / GinSetPostingTree(itup, blkno)
+
+* ItemPointerGetOffsetNumber(&itup->t_tid) contains the magic number
+  GIN_TREE_POSTING, which distinguishes this from the posting-list case
+  (it's large enough that that many heap itempointers couldn't possibly
+  fit on an index page).  This value is inserted automatically by the
+  GinSetPostingTree macro.
+
+* If IndexTupleHasNulls(itup) is true, the null category byte can be
+  accessed/set with GinGetNullCategory(itup,gs) / GinSetNullCategory(itup,gs,c)
+
+* The posting list is not present and must not be accessed.
+
+Use the macro GinIsPostingTree(itup) to determine which case applies.
+
+In both cases, itup->t_info & INDEX_SIZE_MASK contains actual total size of
+tuple, and the INDEX_VAR_MASK and INDEX_NULL_MASK bits have their normal
+meanings as set by index_form_tuple.
+
+Index tuples in non-leaf levels of the btree contain the optional column
+number, key datum, and null category byte as above.  They do not contain
+a posting list.  ItemPointerGetBlockNumber(&itup->t_tid) is the downlink
+to the next lower btree level, and ItemPointerGetOffsetNumber(&itup->t_tid)
+is InvalidOffsetNumber.  Use the access macros GinGetDownlink/GinSetDownlink
+to get/set the downlink.
+
+Index entries that appear in "pending list" pages work a tad differently as
+well.  The optional column number, key datum, and null category byte are as
+for other GIN index entries.  However, there is always exactly one heap
+itempointer associated with a pending entry, and it is stored in the t_tid
+header field just as in non-GIN indexes.  There is no posting list.
+Furthermore, the code that searches the pending list assumes that all
+entries for a given heap tuple appear consecutively in the pending list and
+are sorted by the column-number-plus-key-datum.  The GIN_LIST_FULLROW page
+flag bit tells whether entries for a given heap tuple are spread across
+multiple pending-list pages.  If GIN_LIST_FULLROW is set, the page contains
+all the entries for one or more heap tuples.  If GIN_LIST_FULLROW is clear,
+the page contains entries for only one heap tuple, *and* they are not all
+the entries for that tuple.  (Thus, a heap tuple whose entries do not all
+fit on one pending-list page must have those pages to itself, even if this
+results in wasting much of the space on the preceding page and the last
+page for the tuple.)
+
+GIN packs downlinks and pivot keys into internal page tuples in a different way
+than nbtree does.  Lehman & Yao defines it as following.
+
+P_0, K_1, P_1, K_2, P_2, ... , K_n, P_n, K_{n+1}
+
+There P_i is a downlink and K_i is a key.  K_i splits key space between P_{i-1}
+and P_i (0 <= i <= n).  K_{n+1} is high key.
+
+In internal page tuple is key and downlink grouped together.  nbtree packs
+keys and downlinks into tuples as following.
+
+(K_{n+1}, None), (-Inf, P_0), (K_1, P_1), ... , (K_n, P_n)
+
+There tuples are shown in parentheses.  So, highkey is stored separately.  P_i
+is grouped with K_i.  P_0 is grouped with -Inf key.
+
+GIN packs keys and downlinks into tuples in a different way.
+
+(P_0, K_1), (P_1, K_2), ... , (P_n, K_{n+1})
+
+P_i is grouped with K_{i+1}.  -Inf key is not needed.
+
+There are couple of additional notes regarding K_{n+1} key.
+1) In entry tree rightmost page, a key coupled with P_n doesn't really matter.
+Highkey is assumed to be infinity.
+2) In posting tree, a key coupled with P_n always doesn't matter.  Highkey for
+non-rightmost pages is stored separately and accessed via
+GinDataPageGetRightBound().
+
+Posting tree
+------------
+
+If a posting list is too large to store in-line in a key entry, a posting tree
+is created. A posting tree is a B-tree structure, where the ItemPointer is
+used as the key.
+
+Internal posting tree pages use the standard PageHeader and the same "opaque"
+struct as other GIN page, but do not contain regular index tuples. Instead,
+the contents of the page is an array of PostingItem structs. Each PostingItem
+consists of the block number of the child page, and the right bound of that
+child page, as an ItemPointer. The right bound of the page is stored right
+after the page header, before the PostingItem array.
+
+Posting tree leaf pages also use the standard PageHeader and opaque struct,
+and the right bound of the page is stored right after the page header, but
+the page content comprises of a number of compressed posting lists. The
+compressed posting lists are stored one after each other, between page header
+and pd_lower. The space between pd_lower and pd_upper is unused, which allows
+full-page images of posting tree leaf pages to skip the unused space in middle
+(buffer_std = true in XLogRecData).
+
+The item pointers are stored in a number of independent compressed posting
+lists (also called segments), instead of one big one, to make random access
+to a given item pointer faster: to find an item in a compressed list, you
+have to read the list from the beginning, but when the items are split into
+multiple lists, you can first skip over to the list containing the item you're
+looking for, and read only that segment. Also, an update only needs to
+re-encode the affected segment.
+
+Posting List Compression
+------------------------
+
+To fit as many item pointers on a page as possible, posting tree leaf pages
+and posting lists stored inline in entry tree leaf tuples use a lightweight
+form of compression. We take advantage of the fact that the item pointers
+are stored in sorted order. Instead of storing the block and offset number of
+each item pointer separately, we store the difference from the previous item.
+That in itself doesn't do much, but it allows us to use so-called varbyte
+encoding to compress them.
+
+Varbyte encoding is a method to encode integers, allowing smaller numbers to
+take less space at the cost of larger numbers. Each integer is represented by
+variable number of bytes. High bit of each byte in varbyte encoding determines
+whether the next byte is still part of this number. Therefore, to read a single
+varbyte encoded number, you have to read bytes until you find a byte with the
+high bit not set.
+
+When encoding, the block and offset number forming the item pointer are
+combined into a single integer. The offset number is stored in the 11 low
+bits (see MaxHeapTuplesPerPageBits in ginpostinglist.c), and the block number
+is stored in the higher bits. That requires 43 bits in total, which
+conveniently fits in at most 6 bytes.
+
+A compressed posting list is passed around and stored on disk in a
+GinPostingList struct. The first item in the list is stored uncompressed
+as a regular ItemPointerData, followed by the length of the list in bytes,
+followed by the packed items.
+
+Concurrency
+-----------
+
+The entry tree and each posting tree are B-trees, with right-links connecting
+sibling pages at the same level.  This is the same structure that is used in
+the regular B-tree indexam (invented by Lehman & Yao), but we don't support
+scanning a GIN trees backwards, so we don't need left-links.  The entry tree
+leaves don't have dedicated high keys, instead greatest leaf tuple serves as
+high key.  That works because tuples are never deleted from the entry tree.
+
+The algorithms used to operate entry and posting trees are considered below.
+
+### Locating the leaf page
+
+When we search for leaf page in GIN btree to perform a read, we descend from
+the root page to the leaf through using downlinks taking pin and shared lock on
+one page at once.  So, we release pin and shared lock on previous page before
+getting them on the next page.
+
+The picture below shows tree state after finding the leaf page.  Lower case
+letters depicts tree pages.  'S' depicts shared lock on the page.
+
+               a
+           /   |   \
+       b       c       d
+     / | \     | \     | \
+   eS  f   g   h   i   j   k
+
+### Steping right
+
+Concurrent page splits move the keyspace to right, so after following a
+downlink, the page actually containing the key we're looking for might be
+somewhere to the right of the page we landed on.  In that case, we follow the
+right-links until we find the page we're looking for.
+
+During stepping right we take pin and shared lock on the right sibling before
+releasing them from the current page.  This mechanism was designed to protect
+from stepping to delete page.  We step to the right sibling while hold lock on
+the rightlink pointing there.  So, it's guaranteed that nobody updates rightlink
+concurrently and doesn't delete right sibling accordingly.
+
+The picture below shows two pages locked at once during stepping right.
+
+               a
+           /   |   \
+       b       c       d
+     / | \     | \     | \
+   eS  fS  g   h   i   j   k
+
+### Insert
+
+While finding appropriate leaf for insertion we also descend from the root to
+leaf, while shared locking one page at once in.  But during insertion we don't
+release pins from root and internal pages.  That could save us some lookups to
+the buffers hash table for downlinks insertion assuming parents are not changed
+due to concurrent splits.  Once we reach leaf we re-lock the page in exclusive
+mode.
+
+The picture below shows leaf page locked in exclusive mode and ready for
+insertion.  'P' and 'E' depict pin and exclusive lock correspondingly.
+
+
+               aP
+           /   |   \
+       b       cP      d
+     / | \     | \     | \
+   e   f   g   hE  i   j   k
+
+
+If insert causes a page split, the parent is locked in exclusive mode before
+unlocking the left child.  So, insertion algorithm can exclusively lock both
+parent and child pages at once starting from child.
+
+The picture below shows tree state after leaf page split.  'q' is new page
+produced by split.  Parent 'c' is about to have downlink inserted.
+
+                  aP
+            /     |   \
+       b          cE      d
+     / | \      / | \     | \
+   e   f   g  hE  q   i   j   k
+
+
+### Page deletion
+
+Vacuum never deletes tuples or pages from the entry tree. It traverses entry
+tree leafs in logical order by rightlinks and removes deletable TIDs from
+posting lists. Posting trees are processed by links from entry tree leafs. They
+are vacuumed in two stages. At first stage, deletable TIDs are removed from
+leafs. If first stage detects at least one empty page, then at the second stage
+ginScanToDelete() deletes empty pages.
+
+ginScanToDelete() traverses the whole tree in depth-first manner.  It starts
+from the super-exclusive lock on the tree root.  This lock prevents all the
+concurrent insertions into this tree while we're deleting pages.  However,
+there are still might be some in-progress readers, who traversed root before
+we locked it.
+
+The picture below shows tree state after page deletion algorithm traversed to
+leftmost leaf of the tree.
+
+               aE
+           /   |   \
+       bE      c       d
+     / | \     | \     | \
+   eE  f   g   h   i   j   k
+
+Deletion algorithm keeps exclusive locks on left siblings of pages comprising
+currently investigated path.  Thus, if current page is to be removed, all
+required pages to remove both downlink and rightlink are already locked.  That
+avoids potential right to left page locking order, which could deadlock with
+concurrent stepping right.
+
+A search concurrent to page deletion might already have read a pointer to the
+page to be deleted, and might be just about to follow it.  A page can be reached
+via the right-link of its left sibling, or via its downlink in the parent.
+
+To prevent a backend from reaching a deleted page via a right-link, stepping
+right algorithm doesn't release lock on the current page until lock of the
+right page is acquired.
+
+The downlink is more tricky.  A search descending the tree must release the lock
+on the parent page before locking the child, or it could deadlock with a
+concurrent split of the child page; a page split locks the parent, while already
+holding a lock on the child page.  So, deleted page cannot be reclaimed
+immediately.  Instead, we have to wait for every transaction, which might wait
+to reference this page, to finish.  Corresponding processes must observe that
+the page is marked deleted and recover accordingly.
+
+The picture below shows tree state after page deletion algorithm further
+traversed the tree.  Currently investigated path is 'a-c-h'.  Left siblings 'b'
+and 'g' of 'c' and 'h' correspondingly are also exclusively locked.
+
+               aE
+           /   |   \
+       bE      cE      d
+     / | \     | \     | \
+   e   f   gE  hE  i   j   k
+
+The next picture shows tree state after page 'h' was deleted.  It's marked with
+'deleted' flag and newest xid, which might visit it.  Downlink from 'c' to 'h'
+is also deleted.
+
+               aE
+           /   |   \
+       bE      cE      d
+     / | \       \     | \
+   e   f   gE  hD  iE  j   k
+
+However, it's still possible that concurrent reader has seen downlink from 'c'
+to 'h' before we deleted it.  In that case this reader will step right from 'h'
+to till find non-deleted page.  Xid-marking of page 'h' guarantees that this
+page wouldn't be reused till all such readers gone.  Next leaf page under
+investigation is 'i'.  'g' remains locked as it becomes left sibling of 'i'.
+
+The next picture shows tree state after 'i' and 'c' was deleted.  Internal page
+'c' was deleted because it appeared to have no downlinks.  The path under
+investigation is 'a-d-j'.  Pages 'b' and 'g' are locked as self siblings of 'd'
+and 'j'.
+
+               aE
+           /       \
+       bE      cD      dE
+     / | \             | \
+   e   f   gE  hD  iD  jE  k
+
+During the replay of page deletion at standby, the page's left sibling, the
+target page, and its parent, are locked in that order.  This order guarantees
+no deadlock with concurrent reads.
+
+Predicate Locking
+-----------------
+
+GIN supports predicate locking, for serializable snapshot isolation.
+A predicate locks represent that a scan has scanned a range of values.  They
+are not concerned with physical pages as such, but the logical key values.
+A predicate lock on a page covers the key range that would belong on that
+page, whether or not there are any matching tuples there currently.  In other
+words, a predicate lock on an index page covers the "gaps" between the index
+tuples.  To minimize false positives, predicate locks are acquired at the
+finest level possible.
+
+* Like in the B-tree index, it is enough to lock only leaf pages, because all
+  insertions happen at the leaf level.
+
+* In an equality search (i.e. not a partial match search), if a key entry has
+  a posting tree, we lock the posting tree root page, to represent a lock on
+  just that key entry.  Otherwise, we lock the entry tree page.  We also lock
+  the entry tree page if no match is found, to lock the "gap" where the entry
+  would've been, had there been one.
+
+* In a partial match search, we lock all the entry leaf pages that we scan,
+  in addition to locks on posting tree roots, to represent the "gaps" between
+  values.
+
+* In addition to the locks on entry leaf pages and posting tree roots, all
+  scans grab a lock the metapage.  This is to interlock with insertions to
+  the fast update pending list.  An insertion to the pending list can really
+  belong anywhere in the tree, and the lock on the metapage represents that.
+
+The interlock for fastupdate pending lists means that with fastupdate=on,
+we effectively always grab a full-index lock, so you could get a lot of false
+positives.
+
+Compatibility
+-------------
+
+Compression of TIDs was introduced in 9.4. Some GIN indexes could remain in
+uncompressed format because of pg_upgrade from 9.3 or earlier versions.
+For compatibility, old uncompressed format is also supported. Following
+rules are used to handle it:
+
+* GIN_ITUP_COMPRESSED flag marks index tuples that contain a posting list.
+This flag is stored in high bit of ItemPointerGetBlockNumber(&itup->t_tid).
+Use GinItupIsCompressed(itup) to check the flag.
+
+* Posting tree pages in the new format are marked with the GIN_COMPRESSED flag.
+  Macros GinPageIsCompressed(page) and GinPageSetCompressed(page) are used to
+  check and set this flag.
+
+* All scan operations check format of posting list add use corresponding code
+to read its content.
+
+* When updating an index tuple containing an uncompressed posting list, it
+will be replaced with new index tuple containing a compressed list.
+
+* When updating an uncompressed posting tree leaf page, it's compressed.
+
+* If vacuum finds some dead TIDs in uncompressed posting lists, they are
+converted into compressed posting lists. This assumes that the compressed
+posting list fits in the space occupied by the uncompressed list. IOW, we
+assume that the compressed version of the page, with the dead items removed,
+takes less space than the old uncompressed version.
+
+Limitations
+-----------
+
+  * Gin doesn't use scan->kill_prior_tuple & scan->ignore_killed_tuples
+  * Gin searches entries only by equality matching, or simple range
+    matching using the "partial match" feature.
+
+TODO
+----
+
+Nearest future:
+
+  * Opclasses for more types (no programming, just many catalog changes)
+
+Distant future:
+
+  * Replace B-tree of entries to something like GiST
+
+Authors
+-------
+
+Original work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov
+(oleg@sai.msu.su).
diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c
new file mode 100644
index 0000000..bf73e32
--- /dev/null
+++ b/src/backend/access/gin/ginarrayproc.c
@@ -0,0 +1,305 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginarrayproc.c
+ *	  support functions for GIN's indexing of any array
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/gin/ginarrayproc.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin.h"
+#include "access/stratnum.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+
+#define GinOverlapStrategy		1
+#define GinContainsStrategy		2
+#define GinContainedStrategy	3
+#define GinEqualStrategy		4
+
+
+/*
+ * extractValue support function
+ */
+Datum
+ginarrayextract(PG_FUNCTION_ARGS)
+{
+	/* Make copy of array input to ensure it doesn't disappear while in use */
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
+	int32	   *nkeys = (int32 *) PG_GETARG_POINTER(1);
+	bool	  **nullFlags = (bool **) PG_GETARG_POINTER(2);
+	int16		elmlen;
+	bool		elmbyval;
+	char		elmalign;
+	Datum	   *elems;
+	bool	   *nulls;
+	int			nelems;
+
+	get_typlenbyvalalign(ARR_ELEMTYPE(array),
+						 &elmlen, &elmbyval, &elmalign);
+
+	deconstruct_array(array,
+					  ARR_ELEMTYPE(array),
+					  elmlen, elmbyval, elmalign,
+					  &elems, &nulls, &nelems);
+
+	*nkeys = nelems;
+	*nullFlags = nulls;
+
+	/* we should not free array, elems[i] points into it */
+	PG_RETURN_POINTER(elems);
+}
+
+/*
+ * Formerly, ginarrayextract had only two arguments.  Now it has three,
+ * but we still need a pg_proc entry with two args to support reloading
+ * pre-9.1 contrib/intarray opclass declarations.  This compatibility
+ * function should go away eventually.
+ */
+Datum
+ginarrayextract_2args(PG_FUNCTION_ARGS)
+{
+	if (PG_NARGS() < 3)			/* should not happen */
+		elog(ERROR, "ginarrayextract requires three arguments");
+	return ginarrayextract(fcinfo);
+}
+
+/*
+ * extractQuery support function
+ */
+Datum
+ginqueryarrayextract(PG_FUNCTION_ARGS)
+{
+	/* Make copy of array input to ensure it doesn't disappear while in use */
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
+	int32	   *nkeys = (int32 *) PG_GETARG_POINTER(1);
+	StrategyNumber strategy = PG_GETARG_UINT16(2);
+
+	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
+	/* Pointer	   *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+	bool	  **nullFlags = (bool **) PG_GETARG_POINTER(5);
+	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
+	int16		elmlen;
+	bool		elmbyval;
+	char		elmalign;
+	Datum	   *elems;
+	bool	   *nulls;
+	int			nelems;
+
+	get_typlenbyvalalign(ARR_ELEMTYPE(array),
+						 &elmlen, &elmbyval, &elmalign);
+
+	deconstruct_array(array,
+					  ARR_ELEMTYPE(array),
+					  elmlen, elmbyval, elmalign,
+					  &elems, &nulls, &nelems);
+
+	*nkeys = nelems;
+	*nullFlags = nulls;
+
+	switch (strategy)
+	{
+		case GinOverlapStrategy:
+			*searchMode = GIN_SEARCH_MODE_DEFAULT;
+			break;
+		case GinContainsStrategy:
+			if (nelems > 0)
+				*searchMode = GIN_SEARCH_MODE_DEFAULT;
+			else				/* everything contains the empty set */
+				*searchMode = GIN_SEARCH_MODE_ALL;
+			break;
+		case GinContainedStrategy:
+			/* empty set is contained in everything */
+			*searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY;
+			break;
+		case GinEqualStrategy:
+			if (nelems > 0)
+				*searchMode = GIN_SEARCH_MODE_DEFAULT;
+			else
+				*searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY;
+			break;
+		default:
+			elog(ERROR, "ginqueryarrayextract: unknown strategy number: %d",
+				 strategy);
+	}
+
+	/* we should not free array, elems[i] points into it */
+	PG_RETURN_POINTER(elems);
+}
+
+/*
+ * consistent support function
+ */
+Datum
+ginarrayconsistent(PG_FUNCTION_ARGS)
+{
+	bool	   *check = (bool *) PG_GETARG_POINTER(0);
+	StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+	/* ArrayType  *query = PG_GETARG_ARRAYTYPE_P(2); */
+	int32		nkeys = PG_GETARG_INT32(3);
+
+	/* Pointer	   *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
+
+	/* Datum	   *queryKeys = (Datum *) PG_GETARG_POINTER(6); */
+	bool	   *nullFlags = (bool *) PG_GETARG_POINTER(7);
+	bool		res;
+	int32		i;
+
+	switch (strategy)
+	{
+		case GinOverlapStrategy:
+			/* result is not lossy */
+			*recheck = false;
+			/* must have a match for at least one non-null element */
+			res = false;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (check[i] && !nullFlags[i])
+				{
+					res = true;
+					break;
+				}
+			}
+			break;
+		case GinContainsStrategy:
+			/* result is not lossy */
+			*recheck = false;
+			/* must have all elements in check[] true, and no nulls */
+			res = true;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (!check[i] || nullFlags[i])
+				{
+					res = false;
+					break;
+				}
+			}
+			break;
+		case GinContainedStrategy:
+			/* we will need recheck */
+			*recheck = true;
+			/* can't do anything else useful here */
+			res = true;
+			break;
+		case GinEqualStrategy:
+			/* we will need recheck */
+			*recheck = true;
+
+			/*
+			 * Must have all elements in check[] true; no discrimination
+			 * against nulls here.  This is because array_contain_compare and
+			 * array_eq handle nulls differently ...
+			 */
+			res = true;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (!check[i])
+				{
+					res = false;
+					break;
+				}
+			}
+			break;
+		default:
+			elog(ERROR, "ginarrayconsistent: unknown strategy number: %d",
+				 strategy);
+			res = false;
+	}
+
+	PG_RETURN_BOOL(res);
+}
+
+/*
+ * triconsistent support function
+ */
+Datum
+ginarraytriconsistent(PG_FUNCTION_ARGS)
+{
+	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+	StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+	/* ArrayType  *query = PG_GETARG_ARRAYTYPE_P(2); */
+	int32		nkeys = PG_GETARG_INT32(3);
+
+	/* Pointer	   *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+	/* Datum	   *queryKeys = (Datum *) PG_GETARG_POINTER(5); */
+	bool	   *nullFlags = (bool *) PG_GETARG_POINTER(6);
+	GinTernaryValue res;
+	int32		i;
+
+	switch (strategy)
+	{
+		case GinOverlapStrategy:
+			/* must have a match for at least one non-null element */
+			res = GIN_FALSE;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (!nullFlags[i])
+				{
+					if (check[i] == GIN_TRUE)
+					{
+						res = GIN_TRUE;
+						break;
+					}
+					else if (check[i] == GIN_MAYBE && res == GIN_FALSE)
+					{
+						res = GIN_MAYBE;
+					}
+				}
+			}
+			break;
+		case GinContainsStrategy:
+			/* must have all elements in check[] true, and no nulls */
+			res = GIN_TRUE;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (check[i] == GIN_FALSE || nullFlags[i])
+				{
+					res = GIN_FALSE;
+					break;
+				}
+				if (check[i] == GIN_MAYBE)
+				{
+					res = GIN_MAYBE;
+				}
+			}
+			break;
+		case GinContainedStrategy:
+			/* can't do anything else useful here */
+			res = GIN_MAYBE;
+			break;
+		case GinEqualStrategy:
+
+			/*
+			 * Must have all elements in check[] true; no discrimination
+			 * against nulls here.  This is because array_contain_compare and
+			 * array_eq handle nulls differently ...
+			 */
+			res = GIN_MAYBE;
+			for (i = 0; i < nkeys; i++)
+			{
+				if (check[i] == GIN_FALSE)
+				{
+					res = GIN_FALSE;
+					break;
+				}
+			}
+			break;
+		default:
+			elog(ERROR, "ginarrayconsistent: unknown strategy number: %d",
+				 strategy);
+			res = false;
+	}
+
+	PG_RETURN_GIN_TERNARY_VALUE(res);
+}
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
new file mode 100644
index 0000000..482cf10
--- /dev/null
+++ b/src/backend/access/gin/ginbtree.c
@@ -0,0 +1,795 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginbtree.c
+ *	  page utilities routines for the postgres inverted index access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginbtree.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xloginsert.h"
+#include "miscadmin.h"
+#include "storage/predicate.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
+static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
+						   void *insertdata, BlockNumber updateblkno,
+						   Buffer childbuf, GinStatsData *buildStats);
+static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
+						   bool freestack, GinStatsData *buildStats);
+
+/*
+ * Lock buffer by needed method for search.
+ */
+int
+ginTraverseLock(Buffer buffer, bool searchMode)
+{
+	Page		page;
+	int			access = GIN_SHARE;
+
+	LockBuffer(buffer, GIN_SHARE);
+	page = BufferGetPage(buffer);
+	if (GinPageIsLeaf(page))
+	{
+		if (searchMode == false)
+		{
+			/* we should relock our page */
+			LockBuffer(buffer, GIN_UNLOCK);
+			LockBuffer(buffer, GIN_EXCLUSIVE);
+
+			/* But root can become non-leaf during relock */
+			if (!GinPageIsLeaf(page))
+			{
+				/* restore old lock type (very rare) */
+				LockBuffer(buffer, GIN_UNLOCK);
+				LockBuffer(buffer, GIN_SHARE);
+			}
+			else
+				access = GIN_EXCLUSIVE;
+		}
+	}
+
+	return access;
+}
+
+/*
+ * Descend the tree to the leaf page that contains or would contain the key
+ * we're searching for. The key should already be filled in 'btree', in
+ * tree-type specific manner. If btree->fullScan is true, descends to the
+ * leftmost leaf page.
+ *
+ * If 'searchmode' is false, on return stack->buffer is exclusively locked,
+ * and the stack represents the full path to the root. Otherwise stack->buffer
+ * is share-locked, and stack->parent is NULL.
+ *
+ * If 'rootConflictCheck' is true, tree root is checked for serialization
+ * conflict.
+ */
+GinBtreeStack *
+ginFindLeafPage(GinBtree btree, bool searchMode,
+				bool rootConflictCheck, Snapshot snapshot)
+{
+	GinBtreeStack *stack;
+
+	stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
+	stack->blkno = btree->rootBlkno;
+	stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
+	stack->parent = NULL;
+	stack->predictNumber = 1;
+
+	if (rootConflictCheck)
+		CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
+
+	for (;;)
+	{
+		Page		page;
+		BlockNumber child;
+		int			access;
+
+		stack->off = InvalidOffsetNumber;
+
+		page = BufferGetPage(stack->buffer);
+		TestForOldSnapshot(snapshot, btree->index, page);
+
+		access = ginTraverseLock(stack->buffer, searchMode);
+
+		/*
+		 * If we're going to modify the tree, finish any incomplete splits we
+		 * encounter on the way.
+		 */
+		if (!searchMode && GinPageIsIncompleteSplit(page))
+			ginFinishSplit(btree, stack, false, NULL);
+
+		/*
+		 * ok, page is correctly locked, we should check to move right ..,
+		 * root never has a right link, so small optimization
+		 */
+		while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
+			   btree->isMoveRight(btree, page))
+		{
+			BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
+
+			if (rightlink == InvalidBlockNumber)
+				/* rightmost page */
+				break;
+
+			stack->buffer = ginStepRight(stack->buffer, btree->index, access);
+			stack->blkno = rightlink;
+			page = BufferGetPage(stack->buffer);
+			TestForOldSnapshot(snapshot, btree->index, page);
+
+			if (!searchMode && GinPageIsIncompleteSplit(page))
+				ginFinishSplit(btree, stack, false, NULL);
+		}
+
+		if (GinPageIsLeaf(page))	/* we found, return locked page */
+			return stack;
+
+		/* now we have correct buffer, try to find child */
+		child = btree->findChildPage(btree, stack);
+
+		LockBuffer(stack->buffer, GIN_UNLOCK);
+		Assert(child != InvalidBlockNumber);
+		Assert(stack->blkno != child);
+
+		if (searchMode)
+		{
+			/* in search mode we may forget path to leaf */
+			stack->blkno = child;
+			stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
+		}
+		else
+		{
+			GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
+
+			ptr->parent = stack;
+			stack = ptr;
+			stack->blkno = child;
+			stack->buffer = ReadBuffer(btree->index, stack->blkno);
+			stack->predictNumber = 1;
+		}
+	}
+}
+
+/*
+ * Step right from current page.
+ *
+ * The next page is locked first, before releasing the current page. This is
+ * crucial to protect from concurrent page deletion (see comment in
+ * ginDeletePage).
+ */
+Buffer
+ginStepRight(Buffer buffer, Relation index, int lockmode)
+{
+	Buffer		nextbuffer;
+	Page		page = BufferGetPage(buffer);
+	bool		isLeaf = GinPageIsLeaf(page);
+	bool		isData = GinPageIsData(page);
+	BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+
+	nextbuffer = ReadBuffer(index, blkno);
+	LockBuffer(nextbuffer, lockmode);
+	UnlockReleaseBuffer(buffer);
+
+	/* Sanity check that the page we stepped to is of similar kind. */
+	page = BufferGetPage(nextbuffer);
+	if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
+		elog(ERROR, "right sibling of GIN page is of different type");
+
+	return nextbuffer;
+}
+
+void
+freeGinBtreeStack(GinBtreeStack *stack)
+{
+	while (stack)
+	{
+		GinBtreeStack *tmp = stack->parent;
+
+		if (stack->buffer != InvalidBuffer)
+			ReleaseBuffer(stack->buffer);
+
+		pfree(stack);
+		stack = tmp;
+	}
+}
+
+/*
+ * Try to find parent for current stack position. Returns correct parent and
+ * child's offset in stack->parent. The root page is never released, to
+ * prevent conflict with vacuum process.
+ */
+static void
+ginFindParents(GinBtree btree, GinBtreeStack *stack)
+{
+	Page		page;
+	Buffer		buffer;
+	BlockNumber blkno,
+				leftmostBlkno;
+	OffsetNumber offset;
+	GinBtreeStack *root;
+	GinBtreeStack *ptr;
+
+	/*
+	 * Unwind the stack all the way up to the root, leaving only the root
+	 * item.
+	 *
+	 * Be careful not to release the pin on the root page! The pin on root
+	 * page is required to lock out concurrent vacuums on the tree.
+	 */
+	root = stack->parent;
+	while (root->parent)
+	{
+		ReleaseBuffer(root->buffer);
+		root = root->parent;
+	}
+
+	Assert(root->blkno == btree->rootBlkno);
+	Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
+	root->off = InvalidOffsetNumber;
+
+	blkno = root->blkno;
+	buffer = root->buffer;
+
+	ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
+
+	for (;;)
+	{
+		LockBuffer(buffer, GIN_EXCLUSIVE);
+		page = BufferGetPage(buffer);
+		if (GinPageIsLeaf(page))
+			elog(ERROR, "Lost path");
+
+		if (GinPageIsIncompleteSplit(page))
+		{
+			Assert(blkno != btree->rootBlkno);
+			ptr->blkno = blkno;
+			ptr->buffer = buffer;
+
+			/*
+			 * parent may be wrong, but if so, the ginFinishSplit call will
+			 * recurse to call ginFindParents again to fix it.
+			 */
+			ptr->parent = root;
+			ptr->off = InvalidOffsetNumber;
+
+			ginFinishSplit(btree, ptr, false, NULL);
+		}
+
+		leftmostBlkno = btree->getLeftMostChild(btree, page);
+
+		while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
+		{
+			blkno = GinPageGetOpaque(page)->rightlink;
+			if (blkno == InvalidBlockNumber)
+			{
+				UnlockReleaseBuffer(buffer);
+				break;
+			}
+			buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
+			page = BufferGetPage(buffer);
+
+			/* finish any incomplete splits, as above */
+			if (GinPageIsIncompleteSplit(page))
+			{
+				Assert(blkno != btree->rootBlkno);
+				ptr->blkno = blkno;
+				ptr->buffer = buffer;
+				ptr->parent = root;
+				ptr->off = InvalidOffsetNumber;
+
+				ginFinishSplit(btree, ptr, false, NULL);
+			}
+		}
+
+		if (blkno != InvalidBlockNumber)
+		{
+			ptr->blkno = blkno;
+			ptr->buffer = buffer;
+			ptr->parent = root; /* it may be wrong, but in next call we will
+								 * correct */
+			ptr->off = offset;
+			stack->parent = ptr;
+			return;
+		}
+
+		/* Descend down to next level */
+		blkno = leftmostBlkno;
+		buffer = ReadBuffer(btree->index, blkno);
+	}
+}
+
+/*
+ * Insert a new item to a page.
+ *
+ * Returns true if the insertion was finished. On false, the page was split and
+ * the parent needs to be updated. (A root split returns true as it doesn't
+ * need any further action by the caller to complete.)
+ *
+ * When inserting a downlink to an internal page, 'childbuf' contains the
+ * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
+ * atomically with the insert. Also, the existing item at offset stack->off
+ * in the target page is updated to point to updateblkno.
+ *
+ * stack->buffer is locked on entry, and is kept locked.
+ * Likewise for childbuf, if given.
+ */
+static bool
+ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
+			   void *insertdata, BlockNumber updateblkno,
+			   Buffer childbuf, GinStatsData *buildStats)
+{
+	Page		page = BufferGetPage(stack->buffer);
+	bool		result;
+	GinPlaceToPageRC rc;
+	uint16		xlflags = 0;
+	Page		childpage = NULL;
+	Page		newlpage = NULL,
+				newrpage = NULL;
+	void	   *ptp_workspace = NULL;
+	MemoryContext tmpCxt;
+	MemoryContext oldCxt;
+
+	/*
+	 * We do all the work of this function and its subfunctions in a temporary
+	 * memory context.  This avoids leakages and simplifies APIs, since some
+	 * subfunctions allocate storage that has to survive until we've finished
+	 * the WAL insertion.
+	 */
+	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+								   "ginPlaceToPage temporary context",
+								   ALLOCSET_DEFAULT_SIZES);
+	oldCxt = MemoryContextSwitchTo(tmpCxt);
+
+	if (GinPageIsData(page))
+		xlflags |= GIN_INSERT_ISDATA;
+	if (GinPageIsLeaf(page))
+	{
+		xlflags |= GIN_INSERT_ISLEAF;
+		Assert(!BufferIsValid(childbuf));
+		Assert(updateblkno == InvalidBlockNumber);
+	}
+	else
+	{
+		Assert(BufferIsValid(childbuf));
+		Assert(updateblkno != InvalidBlockNumber);
+		childpage = BufferGetPage(childbuf);
+	}
+
+	/*
+	 * See if the incoming tuple will fit on the page.  beginPlaceToPage will
+	 * decide if the page needs to be split, and will compute the split
+	 * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
+	 * functions for more details of the API here.
+	 */
+	rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
+								 insertdata, updateblkno,
+								 &ptp_workspace,
+								 &newlpage, &newrpage);
+
+	if (rc == GPTP_NO_WORK)
+	{
+		/* Nothing to do */
+		result = true;
+	}
+	else if (rc == GPTP_INSERT)
+	{
+		/* It will fit, perform the insertion */
+		START_CRIT_SECTION();
+
+		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+		{
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
+			if (BufferIsValid(childbuf))
+				XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+		}
+
+		/* Perform the page update, and register any extra WAL data */
+		btree->execPlaceToPage(btree, stack->buffer, stack,
+							   insertdata, updateblkno, ptp_workspace);
+
+		MarkBufferDirty(stack->buffer);
+
+		/* An insert to an internal page finishes the split of the child. */
+		if (BufferIsValid(childbuf))
+		{
+			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
+			MarkBufferDirty(childbuf);
+		}
+
+		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+		{
+			XLogRecPtr	recptr;
+			ginxlogInsert xlrec;
+			BlockIdData childblknos[2];
+
+			xlrec.flags = xlflags;
+
+			XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
+
+			/*
+			 * Log information about child if this was an insertion of a
+			 * downlink.
+			 */
+			if (BufferIsValid(childbuf))
+			{
+				BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
+				BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
+				XLogRegisterData((char *) childblknos,
+								 sizeof(BlockIdData) * 2);
+			}
+
+			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
+			PageSetLSN(page, recptr);
+			if (BufferIsValid(childbuf))
+				PageSetLSN(childpage, recptr);
+		}
+
+		END_CRIT_SECTION();
+
+		/* Insertion is complete. */
+		result = true;
+	}
+	else if (rc == GPTP_SPLIT)
+	{
+		/*
+		 * Didn't fit, need to split.  The split has been computed in newlpage
+		 * and newrpage, which are pointers to palloc'd pages, not associated
+		 * with buffers.  stack->buffer is not touched yet.
+		 */
+		Buffer		rbuffer;
+		BlockNumber savedRightLink;
+		ginxlogSplit data;
+		Buffer		lbuffer = InvalidBuffer;
+		Page		newrootpg = NULL;
+
+		/* Get a new index page to become the right page */
+		rbuffer = GinNewBuffer(btree->index);
+
+		/* During index build, count the new page */
+		if (buildStats)
+		{
+			if (btree->isData)
+				buildStats->nDataPages++;
+			else
+				buildStats->nEntryPages++;
+		}
+
+		savedRightLink = GinPageGetOpaque(page)->rightlink;
+
+		/* Begin setting up WAL record */
+		data.node = btree->index->rd_node;
+		data.flags = xlflags;
+		if (BufferIsValid(childbuf))
+		{
+			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
+			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
+		}
+		else
+			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
+
+		if (stack->parent == NULL)
+		{
+			/*
+			 * splitting the root, so we need to allocate new left page and
+			 * place pointers to left and right page on root page.
+			 */
+			lbuffer = GinNewBuffer(btree->index);
+
+			/* During index build, count the new left page */
+			if (buildStats)
+			{
+				if (btree->isData)
+					buildStats->nDataPages++;
+				else
+					buildStats->nEntryPages++;
+			}
+
+			data.rrlink = InvalidBlockNumber;
+			data.flags |= GIN_SPLIT_ROOT;
+
+			GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
+			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
+
+			/*
+			 * Construct a new root page containing downlinks to the new left
+			 * and right pages.  (Do this in a temporary copy rather than
+			 * overwriting the original page directly, since we're not in the
+			 * critical section yet.)
+			 */
+			newrootpg = PageGetTempPage(newrpage);
+			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
+
+			btree->fillRoot(btree, newrootpg,
+							BufferGetBlockNumber(lbuffer), newlpage,
+							BufferGetBlockNumber(rbuffer), newrpage);
+
+			if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
+			{
+
+				PredicateLockPageSplit(btree->index,
+									   BufferGetBlockNumber(stack->buffer),
+									   BufferGetBlockNumber(lbuffer));
+
+				PredicateLockPageSplit(btree->index,
+									   BufferGetBlockNumber(stack->buffer),
+									   BufferGetBlockNumber(rbuffer));
+			}
+
+		}
+		else
+		{
+			/* splitting a non-root page */
+			data.rrlink = savedRightLink;
+
+			GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
+			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
+			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
+
+			if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
+			{
+
+				PredicateLockPageSplit(btree->index,
+									   BufferGetBlockNumber(stack->buffer),
+									   BufferGetBlockNumber(rbuffer));
+			}
+		}
+
+		/*
+		 * OK, we have the new contents of the left page in a temporary copy
+		 * now (newlpage), and likewise for the new contents of the
+		 * newly-allocated right block. The original page is still unchanged.
+		 *
+		 * If this is a root split, we also have a temporary page containing
+		 * the new contents of the root.
+		 */
+
+		START_CRIT_SECTION();
+
+		MarkBufferDirty(rbuffer);
+		MarkBufferDirty(stack->buffer);
+
+		/*
+		 * Restore the temporary copies over the real buffers.
+		 */
+		if (stack->parent == NULL)
+		{
+			/* Splitting the root, three pages to update */
+			MarkBufferDirty(lbuffer);
+			memcpy(page, newrootpg, BLCKSZ);
+			memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
+			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
+		}
+		else
+		{
+			/* Normal split, only two pages to update */
+			memcpy(page, newlpage, BLCKSZ);
+			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
+		}
+
+		/* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
+		if (BufferIsValid(childbuf))
+		{
+			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
+			MarkBufferDirty(childbuf);
+		}
+
+		/* write WAL record */
+		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+		{
+			XLogRecPtr	recptr;
+
+			XLogBeginInsert();
+
+			/*
+			 * We just take full page images of all the split pages. Splits
+			 * are uncommon enough that it's not worth complicating the code
+			 * to be more efficient.
+			 */
+			if (stack->parent == NULL)
+			{
+				XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+				XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+				XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+			}
+			else
+			{
+				XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+				XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+			}
+			if (BufferIsValid(childbuf))
+				XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
+
+			XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
+
+			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
+
+			PageSetLSN(page, recptr);
+			PageSetLSN(BufferGetPage(rbuffer), recptr);
+			if (stack->parent == NULL)
+				PageSetLSN(BufferGetPage(lbuffer), recptr);
+			if (BufferIsValid(childbuf))
+				PageSetLSN(childpage, recptr);
+		}
+		END_CRIT_SECTION();
+
+		/*
+		 * We can release the locks/pins on the new pages now, but keep
+		 * stack->buffer locked.  childbuf doesn't get unlocked either.
+		 */
+		UnlockReleaseBuffer(rbuffer);
+		if (stack->parent == NULL)
+			UnlockReleaseBuffer(lbuffer);
+
+		/*
+		 * If we split the root, we're done. Otherwise the split is not
+		 * complete until the downlink for the new page has been inserted to
+		 * the parent.
+		 */
+		result = (stack->parent == NULL);
+	}
+	else
+	{
+		elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
+		result = false;			/* keep compiler quiet */
+	}
+
+	/* Clean up temp context */
+	MemoryContextSwitchTo(oldCxt);
+	MemoryContextDelete(tmpCxt);
+
+	return result;
+}
+
+/*
+ * Finish a split by inserting the downlink for the new page to parent.
+ *
+ * On entry, stack->buffer is exclusively locked.
+ *
+ * If freestack is true, all the buffers are released and unlocked as we
+ * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
+ * locked, and stack is unmodified, except for possibly moving right to find
+ * the correct parent of page.
+ */
+static void
+ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
+			   GinStatsData *buildStats)
+{
+	Page		page;
+	bool		done;
+	bool		first = true;
+
+	/*
+	 * freestack == false when we encounter an incompletely split page during
+	 * a scan, while freestack == true is used in the normal scenario that a
+	 * split is finished right after the initial insert.
+	 */
+	if (!freestack)
+		elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
+			 stack->blkno, RelationGetRelationName(btree->index));
+
+	/* this loop crawls up the stack until the insertion is complete */
+	do
+	{
+		GinBtreeStack *parent = stack->parent;
+		void	   *insertdata;
+		BlockNumber updateblkno;
+
+		/* search parent to lock */
+		LockBuffer(parent->buffer, GIN_EXCLUSIVE);
+
+		/*
+		 * If the parent page was incompletely split, finish that split first,
+		 * then continue with the current one.
+		 *
+		 * Note: we have to finish *all* incomplete splits we encounter, even
+		 * if we have to move right. Otherwise we might choose as the target a
+		 * page that has no downlink in the parent, and splitting it further
+		 * would fail.
+		 */
+		if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
+			ginFinishSplit(btree, parent, false, buildStats);
+
+		/* move right if it's needed */
+		page = BufferGetPage(parent->buffer);
+		while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
+		{
+			if (GinPageRightMost(page))
+			{
+				/*
+				 * rightmost page, but we don't find parent, we should use
+				 * plain search...
+				 */
+				LockBuffer(parent->buffer, GIN_UNLOCK);
+				ginFindParents(btree, stack);
+				parent = stack->parent;
+				Assert(parent != NULL);
+				break;
+			}
+
+			parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
+			parent->blkno = BufferGetBlockNumber(parent->buffer);
+			page = BufferGetPage(parent->buffer);
+
+			if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
+				ginFinishSplit(btree, parent, false, buildStats);
+		}
+
+		/* insert the downlink */
+		insertdata = btree->prepareDownlink(btree, stack->buffer);
+		updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
+		done = ginPlaceToPage(btree, parent,
+							  insertdata, updateblkno,
+							  stack->buffer, buildStats);
+		pfree(insertdata);
+
+		/*
+		 * If the caller requested to free the stack, unlock and release the
+		 * child buffer now. Otherwise keep it pinned and locked, but if we
+		 * have to recurse up the tree, we can unlock the upper pages, only
+		 * keeping the page at the bottom of the stack locked.
+		 */
+		if (!first || freestack)
+			LockBuffer(stack->buffer, GIN_UNLOCK);
+		if (freestack)
+		{
+			ReleaseBuffer(stack->buffer);
+			pfree(stack);
+		}
+		stack = parent;
+
+		first = false;
+	} while (!done);
+
+	/* unlock the parent */
+	LockBuffer(stack->buffer, GIN_UNLOCK);
+
+	if (freestack)
+		freeGinBtreeStack(stack);
+}
+
+/*
+ * Insert a value to tree described by stack.
+ *
+ * The value to be inserted is given in 'insertdata'. Its format depends
+ * on whether this is an entry or data tree, ginInsertValue just passes it
+ * through to the tree-specific callback function.
+ *
+ * During an index build, buildStats is non-null and the counters it contains
+ * are incremented as needed.
+ *
+ * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
+ */
+void
+ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
+			   GinStatsData *buildStats)
+{
+	bool		done;
+
+	/* If the leaf page was incompletely split, finish the split first */
+	if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
+		ginFinishSplit(btree, stack, false, buildStats);
+
+	done = ginPlaceToPage(btree, stack,
+						  insertdata, InvalidBlockNumber,
+						  InvalidBuffer, buildStats);
+	if (done)
+	{
+		LockBuffer(stack->buffer, GIN_UNLOCK);
+		freeGinBtreeStack(stack);
+	}
+	else
+		ginFinishSplit(btree, stack, true, buildStats);
+}
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
new file mode 100644
index 0000000..4c5067c
--- /dev/null
+++ b/src/backend/access/gin/ginbulk.c
@@ -0,0 +1,293 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginbulk.c
+ *	  routines for fast build of inverted index
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginbulk.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/gin_private.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+
+
+#define DEF_NENTRY	2048		/* GinEntryAccumulator allocation quantum */
+#define DEF_NPTR	5			/* ItemPointer initial allocation quantum */
+
+
+/* Combiner function for rbtree.c */
+static void
+ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg)
+{
+	GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
+	const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
+	BuildAccumulator *accum = (BuildAccumulator *) arg;
+
+	/*
+	 * Note this code assumes that newdata contains only one itempointer.
+	 */
+	if (eo->count >= eo->maxcount)
+	{
+		if (eo->maxcount > INT_MAX)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("posting list is too long"),
+					 errhint("Reduce maintenance_work_mem.")));
+
+		accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
+		eo->maxcount *= 2;
+		eo->list = (ItemPointerData *)
+			repalloc_huge(eo->list, sizeof(ItemPointerData) * eo->maxcount);
+		accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
+	}
+
+	/* If item pointers are not ordered, they will need to be sorted later */
+	if (eo->shouldSort == false)
+	{
+		int			res;
+
+		res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
+		Assert(res != 0);
+
+		if (res > 0)
+			eo->shouldSort = true;
+	}
+
+	eo->list[eo->count] = en->list[0];
+	eo->count++;
+}
+
+/* Comparator function for rbtree.c */
+static int
+cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg)
+{
+	const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
+	const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
+	BuildAccumulator *accum = (BuildAccumulator *) arg;
+
+	return ginCompareAttEntries(accum->ginstate,
+								ea->attnum, ea->key, ea->category,
+								eb->attnum, eb->key, eb->category);
+}
+
+/* Allocator function for rbtree.c */
+static RBTNode *
+ginAllocEntryAccumulator(void *arg)
+{
+	BuildAccumulator *accum = (BuildAccumulator *) arg;
+	GinEntryAccumulator *ea;
+
+	/*
+	 * Allocate memory by rather big chunks to decrease overhead.  We have no
+	 * need to reclaim RBTNodes individually, so this costs nothing.
+	 */
+	if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
+	{
+		accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
+		accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
+		accum->eas_used = 0;
+	}
+
+	/* Allocate new RBTNode from current chunk */
+	ea = accum->entryallocator + accum->eas_used;
+	accum->eas_used++;
+
+	return (RBTNode *) ea;
+}
+
+void
+ginInitBA(BuildAccumulator *accum)
+{
+	/* accum->ginstate is intentionally not set here */
+	accum->allocatedMemory = 0;
+	accum->entryallocator = NULL;
+	accum->eas_used = 0;
+	accum->tree = rbt_create(sizeof(GinEntryAccumulator),
+							 cmpEntryAccumulator,
+							 ginCombineData,
+							 ginAllocEntryAccumulator,
+							 NULL,	/* no freefunc needed */
+							 (void *) accum);
+}
+
+/*
+ * This is basically the same as datumCopy(), but extended to count
+ * palloc'd space in accum->allocatedMemory.
+ */
+static Datum
+getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
+{
+	Form_pg_attribute att;
+	Datum		res;
+
+	att = TupleDescAttr(accum->ginstate->origTupdesc, attnum - 1);
+	if (att->attbyval)
+		res = value;
+	else
+	{
+		res = datumCopy(value, false, att->attlen);
+		accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
+	}
+	return res;
+}
+
+/*
+ * Find/store one entry from indexed value.
+ */
+static void
+ginInsertBAEntry(BuildAccumulator *accum,
+				 ItemPointer heapptr, OffsetNumber attnum,
+				 Datum key, GinNullCategory category)
+{
+	GinEntryAccumulator eatmp;
+	GinEntryAccumulator *ea;
+	bool		isNew;
+
+	/*
+	 * For the moment, fill only the fields of eatmp that will be looked at by
+	 * cmpEntryAccumulator or ginCombineData.
+	 */
+	eatmp.attnum = attnum;
+	eatmp.key = key;
+	eatmp.category = category;
+	/* temporarily set up single-entry itempointer list */
+	eatmp.list = heapptr;
+
+	ea = (GinEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp,
+											&isNew);
+
+	if (isNew)
+	{
+		/*
+		 * Finish initializing new tree entry, including making permanent
+		 * copies of the datum (if it's not null) and itempointer.
+		 */
+		if (category == GIN_CAT_NORM_KEY)
+			ea->key = getDatumCopy(accum, attnum, key);
+		ea->maxcount = DEF_NPTR;
+		ea->count = 1;
+		ea->shouldSort = false;
+		ea->list =
+			(ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
+		ea->list[0] = *heapptr;
+		accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
+	}
+	else
+	{
+		/*
+		 * ginCombineData did everything needed.
+		 */
+	}
+}
+
+/*
+ * Insert the entries for one heap pointer.
+ *
+ * Since the entries are being inserted into a balanced binary tree, you
+ * might think that the order of insertion wouldn't be critical, but it turns
+ * out that inserting the entries in sorted order results in a lot of
+ * rebalancing operations and is slow.  To prevent this, we attempt to insert
+ * the nodes in an order that will produce a nearly-balanced tree if the input
+ * is in fact sorted.
+ *
+ * We do this as follows.  First, we imagine that we have an array whose size
+ * is the smallest power of two greater than or equal to the actual array
+ * size.  Second, we insert the middle entry of our virtual array into the
+ * tree; then, we insert the middles of each half of our virtual array, then
+ * middles of quarters, etc.
+ */
+void
+ginInsertBAEntries(BuildAccumulator *accum,
+				   ItemPointer heapptr, OffsetNumber attnum,
+				   Datum *entries, GinNullCategory *categories,
+				   int32 nentries)
+{
+	uint32		step = nentries;
+
+	if (nentries <= 0)
+		return;
+
+	Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
+	/*
+	 * step will contain largest power of 2 and <= nentries
+	 */
+	step |= (step >> 1);
+	step |= (step >> 2);
+	step |= (step >> 4);
+	step |= (step >> 8);
+	step |= (step >> 16);
+	step >>= 1;
+	step++;
+
+	while (step > 0)
+	{
+		int			i;
+
+		for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
+			ginInsertBAEntry(accum, heapptr, attnum,
+							 entries[i], categories[i]);
+
+		step >>= 1;				/* /2 */
+	}
+}
+
+static int
+qsortCompareItemPointers(const void *a, const void *b)
+{
+	int			res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
+
+	/* Assert that there are no equal item pointers being sorted */
+	Assert(res != 0);
+	return res;
+}
+
+/* Prepare to read out the rbtree contents using ginGetBAEntry */
+void
+ginBeginBAScan(BuildAccumulator *accum)
+{
+	rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk);
+}
+
+/*
+ * Get the next entry in sequence from the BuildAccumulator's rbtree.
+ * This consists of a single key datum and a list (array) of one or more
+ * heap TIDs in which that key is found.  The list is guaranteed sorted.
+ */
+ItemPointerData *
+ginGetBAEntry(BuildAccumulator *accum,
+			  OffsetNumber *attnum, Datum *key, GinNullCategory *category,
+			  uint32 *n)
+{
+	GinEntryAccumulator *entry;
+	ItemPointerData *list;
+
+	entry = (GinEntryAccumulator *) rbt_iterate(&accum->tree_walk);
+
+	if (entry == NULL)
+		return NULL;			/* no more entries */
+
+	*attnum = entry->attnum;
+	*key = entry->key;
+	*category = entry->category;
+	list = entry->list;
+	*n = entry->count;
+
+	Assert(list != NULL && entry->count > 0);
+
+	if (entry->shouldSort && entry->count > 1)
+		qsort(list, entry->count, sizeof(ItemPointerData),
+			  qsortCompareItemPointers);
+
+	return list;
+}
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
new file mode 100644
index 0000000..06c0586
--- /dev/null
+++ b/src/backend/access/gin/gindatapage.c
@@ -0,0 +1,1942 @@
+/*-------------------------------------------------------------------------
+ *
+ * gindatapage.c
+ *	  routines for handling GIN posting tree pages.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/gindatapage.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xloginsert.h"
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+
+/*
+ * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
+ *
+ * The code can deal with any size, but random access is more efficient when
+ * a number of smaller lists are stored, rather than one big list. If a
+ * posting list would become larger than Max size as a result of insertions,
+ * it is split into two. If a posting list would be smaller than minimum
+ * size, it is merged with the next posting list.
+ */
+#define GinPostingListSegmentMaxSize 384
+#define GinPostingListSegmentTargetSize 256
+#define GinPostingListSegmentMinSize 128
+
+/*
+ * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
+ * long segment. This is used when estimating how much space is required
+ * for N items, at minimum.
+ */
+#define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
+
+/*
+ * A working struct for manipulating a posting tree leaf page.
+ */
+typedef struct
+{
+	dlist_head	segments;		/* a list of leafSegmentInfos */
+
+	/*
+	 * The following fields represent how the segments are split across pages,
+	 * if a page split is required. Filled in by leafRepackItems.
+	 */
+	dlist_node *lastleft;		/* last segment on left page */
+	int			lsize;			/* total size on left page */
+	int			rsize;			/* total size on right page */
+
+	bool		oldformat;		/* page is in pre-9.4 format on disk */
+
+	/*
+	 * If we need WAL data representing the reconstructed leaf page, it's
+	 * stored here by computeLeafRecompressWALData.
+	 */
+	char	   *walinfo;		/* buffer start */
+	int			walinfolen;		/* and length */
+} disassembledLeaf;
+
+typedef struct
+{
+	dlist_node	node;			/* linked list pointers */
+
+	/*-------------
+	 * 'action' indicates the status of this in-memory segment, compared to
+	 * what's on disk. It is one of the GIN_SEGMENT_* action codes:
+	 *
+	 * UNMODIFIED	no changes
+	 * DELETE		the segment is to be removed. 'seg' and 'items' are
+	 *				ignored
+	 * INSERT		this is a completely new segment
+	 * REPLACE		this replaces an existing segment with new content
+	 * ADDITEMS		like REPLACE, but no items have been removed, and we track
+	 *				in detail what items have been added to this segment, in
+	 *				'modifieditems'
+	 *-------------
+	 */
+	char		action;
+
+	ItemPointerData *modifieditems;
+	uint16		nmodifieditems;
+
+	/*
+	 * The following fields represent the items in this segment. If 'items' is
+	 * not NULL, it contains a palloc'd array of the items in this segment. If
+	 * 'seg' is not NULL, it contains the items in an already-compressed
+	 * format. It can point to an on-disk page (!modified), or a palloc'd
+	 * segment in memory. If both are set, they must represent the same items.
+	 */
+	GinPostingList *seg;
+	ItemPointer items;
+	int			nitems;			/* # of items in 'items', if items != NULL */
+} leafSegmentInfo;
+
+static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
+static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
+								  GinBtreeStack *stack,
+								  void *insertdata, BlockNumber updateblkno,
+								  Page *newlpage, Page *newrpage);
+
+static disassembledLeaf *disassembleLeaf(Page page);
+static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
+static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
+						   int nNewItems);
+
+static void computeLeafRecompressWALData(disassembledLeaf *leaf);
+static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
+static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
+									 ItemPointerData lbound, ItemPointerData rbound,
+									 Page lpage, Page rpage);
+
+/*
+ * Read TIDs from leaf data page to single uncompressed array. The TIDs are
+ * returned in ascending order.
+ *
+ * advancePast is a hint, indicating that the caller is only interested in
+ * TIDs > advancePast. To return all items, use ItemPointerSetMin.
+ *
+ * Note: This function can still return items smaller than advancePast that
+ * are in the same posting list as the items of interest, so the caller must
+ * still check all the returned items. But passing it allows this function to
+ * skip whole posting lists.
+ */
+ItemPointer
+GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
+{
+	ItemPointer result;
+
+	if (GinPageIsCompressed(page))
+	{
+		GinPostingList *seg = GinDataLeafPageGetPostingList(page);
+		Size		len = GinDataLeafPageGetPostingListSize(page);
+		Pointer		endptr = ((Pointer) seg) + len;
+		GinPostingList *next;
+
+		/* Skip to the segment containing advancePast+1 */
+		if (ItemPointerIsValid(&advancePast))
+		{
+			next = GinNextPostingListSegment(seg);
+			while ((Pointer) next < endptr &&
+				   ginCompareItemPointers(&next->first, &advancePast) <= 0)
+			{
+				seg = next;
+				next = GinNextPostingListSegment(seg);
+			}
+			len = endptr - (Pointer) seg;
+		}
+
+		if (len > 0)
+			result = ginPostingListDecodeAllSegments(seg, len, nitems);
+		else
+		{
+			result = NULL;
+			*nitems = 0;
+		}
+	}
+	else
+	{
+		ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
+
+		result = palloc((*nitems) * sizeof(ItemPointerData));
+		memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
+	}
+
+	return result;
+}
+
+/*
+ * Places all TIDs from leaf data page to bitmap.
+ */
+int
+GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
+{
+	ItemPointer uncompressed;
+	int			nitems;
+
+	if (GinPageIsCompressed(page))
+	{
+		GinPostingList *segment = GinDataLeafPageGetPostingList(page);
+		Size		len = GinDataLeafPageGetPostingListSize(page);
+
+		nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
+	}
+	else
+	{
+		uncompressed = dataLeafPageGetUncompressed(page, &nitems);
+
+		if (nitems > 0)
+			tbm_add_tuples(tbm, uncompressed, nitems, false);
+	}
+
+	return nitems;
+}
+
+/*
+ * Get pointer to the uncompressed array of items on a pre-9.4 format
+ * uncompressed leaf page. The number of items in the array is returned in
+ * *nitems.
+ */
+static ItemPointer
+dataLeafPageGetUncompressed(Page page, int *nitems)
+{
+	ItemPointer items;
+
+	Assert(!GinPageIsCompressed(page));
+
+	/*
+	 * In the old pre-9.4 page format, the whole page content is used for
+	 * uncompressed items, and the number of items is stored in 'maxoff'
+	 */
+	items = (ItemPointer) GinDataPageGetData(page);
+	*nitems = GinPageGetOpaque(page)->maxoff;
+
+	return items;
+}
+
+/*
+ * Check if we should follow the right link to find the item we're searching
+ * for.
+ *
+ * Compares inserting item pointer with the right bound of the current page.
+ */
+static bool
+dataIsMoveRight(GinBtree btree, Page page)
+{
+	ItemPointer iptr = GinDataPageGetRightBound(page);
+
+	if (GinPageRightMost(page))
+		return false;
+
+	if (GinPageIsDeleted(page))
+		return true;
+
+	return (ginCompareItemPointers(&btree->itemptr, iptr) > 0) ? true : false;
+}
+
+/*
+ * Find correct PostingItem in non-leaf page. It is assumed that this is
+ * the correct page, and the searched value SHOULD be on the page.
+ */
+static BlockNumber
+dataLocateItem(GinBtree btree, GinBtreeStack *stack)
+{
+	OffsetNumber low,
+				high,
+				maxoff;
+	PostingItem *pitem = NULL;
+	int			result;
+	Page		page = BufferGetPage(stack->buffer);
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(GinPageIsData(page));
+
+	if (btree->fullScan)
+	{
+		stack->off = FirstOffsetNumber;
+		stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
+		return btree->getLeftMostChild(btree, page);
+	}
+
+	low = FirstOffsetNumber;
+	maxoff = high = GinPageGetOpaque(page)->maxoff;
+	Assert(high >= low);
+
+	high++;
+
+	while (high > low)
+	{
+		OffsetNumber mid = low + ((high - low) / 2);
+
+		pitem = GinDataPageGetPostingItem(page, mid);
+
+		if (mid == maxoff)
+		{
+			/*
+			 * Right infinity, page already correctly chosen with a help of
+			 * dataIsMoveRight
+			 */
+			result = -1;
+		}
+		else
+		{
+			pitem = GinDataPageGetPostingItem(page, mid);
+			result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
+		}
+
+		if (result == 0)
+		{
+			stack->off = mid;
+			return PostingItemGetBlockNumber(pitem);
+		}
+		else if (result > 0)
+			low = mid + 1;
+		else
+			high = mid;
+	}
+
+	Assert(high >= FirstOffsetNumber && high <= maxoff);
+
+	stack->off = high;
+	pitem = GinDataPageGetPostingItem(page, high);
+	return PostingItemGetBlockNumber(pitem);
+}
+
+/*
+ * Find link to blkno on non-leaf page, returns offset of PostingItem
+ */
+static OffsetNumber
+dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
+{
+	OffsetNumber i,
+				maxoff = GinPageGetOpaque(page)->maxoff;
+	PostingItem *pitem;
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(GinPageIsData(page));
+
+	/* if page isn't changed, we return storedOff */
+	if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
+	{
+		pitem = GinDataPageGetPostingItem(page, storedOff);
+		if (PostingItemGetBlockNumber(pitem) == blkno)
+			return storedOff;
+
+		/*
+		 * we hope, that needed pointer goes to right. It's true if there
+		 * wasn't a deletion
+		 */
+		for (i = storedOff + 1; i <= maxoff; i++)
+		{
+			pitem = GinDataPageGetPostingItem(page, i);
+			if (PostingItemGetBlockNumber(pitem) == blkno)
+				return i;
+		}
+
+		maxoff = storedOff - 1;
+	}
+
+	/* last chance */
+	for (i = FirstOffsetNumber; i <= maxoff; i++)
+	{
+		pitem = GinDataPageGetPostingItem(page, i);
+		if (PostingItemGetBlockNumber(pitem) == blkno)
+			return i;
+	}
+
+	return InvalidOffsetNumber;
+}
+
+/*
+ * Return blkno of leftmost child
+ */
+static BlockNumber
+dataGetLeftMostPage(GinBtree btree, Page page)
+{
+	PostingItem *pitem;
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(GinPageIsData(page));
+	Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
+
+	pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
+	return PostingItemGetBlockNumber(pitem);
+}
+
+/*
+ * Add PostingItem to a non-leaf page.
+ */
+void
+GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
+{
+	OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
+	char	   *ptr;
+
+	Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
+	Assert(!GinPageIsLeaf(page));
+
+	if (offset == InvalidOffsetNumber)
+	{
+		ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
+	}
+	else
+	{
+		ptr = (char *) GinDataPageGetPostingItem(page, offset);
+		if (offset != maxoff + 1)
+			memmove(ptr + sizeof(PostingItem),
+					ptr,
+					(maxoff - offset + 1) * sizeof(PostingItem));
+	}
+	memcpy(ptr, data, sizeof(PostingItem));
+
+	maxoff++;
+	GinPageGetOpaque(page)->maxoff = maxoff;
+
+	/*
+	 * Also set pd_lower to the end of the posting items, to follow the
+	 * "standard" page layout, so that we can squeeze out the unused space
+	 * from full-page images.
+	 */
+	GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
+}
+
+/*
+ * Delete posting item from non-leaf page
+ */
+void
+GinPageDeletePostingItem(Page page, OffsetNumber offset)
+{
+	OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(offset >= FirstOffsetNumber && offset <= maxoff);
+
+	if (offset != maxoff)
+		memmove(GinDataPageGetPostingItem(page, offset),
+				GinDataPageGetPostingItem(page, offset + 1),
+				sizeof(PostingItem) * (maxoff - offset));
+
+	maxoff--;
+	GinPageGetOpaque(page)->maxoff = maxoff;
+
+	GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
+}
+
+/*
+ * Prepare to insert data on a leaf data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section.  *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT.
+ *
+ * In neither case should the given page buffer be modified here.
+ */
+static GinPlaceToPageRC
+dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+						 void *insertdata,
+						 void **ptp_workspace,
+						 Page *newlpage, Page *newrpage)
+{
+	GinBtreeDataLeafInsertData *items = insertdata;
+	ItemPointer newItems = &items->items[items->curitem];
+	int			maxitems = items->nitem - items->curitem;
+	Page		page = BufferGetPage(buf);
+	int			i;
+	ItemPointerData rbound;
+	ItemPointerData lbound;
+	bool		needsplit;
+	bool		append;
+	int			segsize;
+	Size		freespace;
+	disassembledLeaf *leaf;
+	leafSegmentInfo *lastleftinfo;
+	ItemPointerData maxOldItem;
+	ItemPointerData remaining;
+
+	rbound = *GinDataPageGetRightBound(page);
+
+	/*
+	 * Count how many of the new items belong to this page.
+	 */
+	if (!GinPageRightMost(page))
+	{
+		for (i = 0; i < maxitems; i++)
+		{
+			if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
+			{
+				/*
+				 * This needs to go to some other location in the tree. (The
+				 * caller should've chosen the insert location so that at
+				 * least the first item goes here.)
+				 */
+				Assert(i > 0);
+				break;
+			}
+		}
+		maxitems = i;
+	}
+
+	/* Disassemble the data on the page */
+	leaf = disassembleLeaf(page);
+
+	/*
+	 * Are we appending to the end of the page? IOW, are all the new items
+	 * larger than any of the existing items.
+	 */
+	if (!dlist_is_empty(&leaf->segments))
+	{
+		lastleftinfo = dlist_container(leafSegmentInfo, node,
+									   dlist_tail_node(&leaf->segments));
+		if (!lastleftinfo->items)
+			lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
+													   &lastleftinfo->nitems);
+		maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
+		if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
+			append = true;
+		else
+			append = false;
+	}
+	else
+	{
+		ItemPointerSetMin(&maxOldItem);
+		append = true;
+	}
+
+	/*
+	 * If we're appending to the end of the page, we will append as many items
+	 * as we can fit (after splitting), and stop when the pages becomes full.
+	 * Otherwise we have to limit the number of new items to insert, because
+	 * once we start packing we can't just stop when we run out of space,
+	 * because we must make sure that all the old items still fit.
+	 */
+	if (GinPageIsCompressed(page))
+		freespace = GinDataLeafPageGetFreeSpace(page);
+	else
+		freespace = 0;
+	if (append)
+	{
+		/*
+		 * Even when appending, trying to append more items than will fit is
+		 * not completely free, because we will merge the new items and old
+		 * items into an array below. In the best case, every new item fits in
+		 * a single byte, and we can use all the free space on the old page as
+		 * well as the new page. For simplicity, ignore segment overhead etc.
+		 */
+		maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
+	}
+	else
+	{
+		/*
+		 * Calculate a conservative estimate of how many new items we can fit
+		 * on the two pages after splitting.
+		 *
+		 * We can use any remaining free space on the old page to store full
+		 * segments, as well as the new page. Each full-sized segment can hold
+		 * at least MinTuplesPerSegment items
+		 */
+		int			nnewsegments;
+
+		nnewsegments = freespace / GinPostingListSegmentMaxSize;
+		nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
+		maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
+	}
+
+	/* Add the new items to the segment list */
+	if (!addItemsToLeaf(leaf, newItems, maxitems))
+	{
+		/* all items were duplicates, we have nothing to do */
+		items->curitem += maxitems;
+
+		return GPTP_NO_WORK;
+	}
+
+	/*
+	 * Pack the items back to compressed segments, ready for writing to disk.
+	 */
+	needsplit = leafRepackItems(leaf, &remaining);
+
+	/*
+	 * Did all the new items fit?
+	 *
+	 * If we're appending, it's OK if they didn't. But as a sanity check,
+	 * verify that all the old items fit.
+	 */
+	if (ItemPointerIsValid(&remaining))
+	{
+		if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
+			elog(ERROR, "could not split GIN page; all old items didn't fit");
+
+		/* Count how many of the new items did fit. */
+		for (i = 0; i < maxitems; i++)
+		{
+			if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
+				break;
+		}
+		if (i == 0)
+			elog(ERROR, "could not split GIN page; no new items fit");
+		maxitems = i;
+	}
+
+	if (!needsplit)
+	{
+		/*
+		 * Great, all the items fit on a single page.  If needed, prepare data
+		 * for a WAL record describing the changes we'll make.
+		 */
+		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+			computeLeafRecompressWALData(leaf);
+
+		/*
+		 * We're ready to enter the critical section, but
+		 * dataExecPlaceToPageLeaf will need access to the "leaf" data.
+		 */
+		*ptp_workspace = leaf;
+
+		if (append)
+			elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
+				 maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
+				 items->nitem - items->curitem - maxitems);
+		else
+			elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
+				 maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
+				 items->nitem - items->curitem - maxitems);
+	}
+	else
+	{
+		/*
+		 * Have to split.
+		 *
+		 * leafRepackItems already divided the segments between the left and
+		 * the right page. It filled the left page as full as possible, and
+		 * put the rest to the right page. When building a new index, that's
+		 * good, because the table is scanned from beginning to end and there
+		 * won't be any more insertions to the left page during the build.
+		 * This packs the index as tight as possible. But otherwise, split
+		 * 50/50, by moving segments from the left page to the right page
+		 * until they're balanced.
+		 *
+		 * As a further heuristic, when appending items to the end of the
+		 * page, try to make the left page 75% full, on the assumption that
+		 * subsequent insertions will probably also go to the end. This packs
+		 * the index somewhat tighter when appending to a table, which is very
+		 * common.
+		 */
+		if (!btree->isBuild)
+		{
+			while (dlist_has_prev(&leaf->segments, leaf->lastleft))
+			{
+				lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
+
+				/* ignore deleted segments */
+				if (lastleftinfo->action != GIN_SEGMENT_DELETE)
+				{
+					segsize = SizeOfGinPostingList(lastleftinfo->seg);
+
+					/*
+					 * Note that we check that the right page doesn't become
+					 * more full than the left page even when appending. It's
+					 * possible that we added enough items to make both pages
+					 * more than 75% full.
+					 */
+					if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
+						break;
+					if (append)
+					{
+						if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
+							break;
+					}
+
+					leaf->lsize -= segsize;
+					leaf->rsize += segsize;
+				}
+				leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
+			}
+		}
+		Assert(leaf->lsize <= GinDataPageMaxDataSize);
+		Assert(leaf->rsize <= GinDataPageMaxDataSize);
+
+		/*
+		 * Fetch the max item in the left page's last segment; it becomes the
+		 * right bound of the page.
+		 */
+		lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
+		if (!lastleftinfo->items)
+			lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
+													   &lastleftinfo->nitems);
+		lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
+
+		/*
+		 * Now allocate a couple of temporary page images, and fill them.
+		 */
+		*newlpage = palloc(BLCKSZ);
+		*newrpage = palloc(BLCKSZ);
+
+		dataPlaceToPageLeafSplit(leaf, lbound, rbound,
+								 *newlpage, *newrpage);
+
+		Assert(GinPageRightMost(page) ||
+			   ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
+									  GinDataPageGetRightBound(*newrpage)) < 0);
+
+		if (append)
+			elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
+				 maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
+				 items->nitem - items->curitem - maxitems);
+		else
+			elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
+				 maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
+				 items->nitem - items->curitem - maxitems);
+	}
+
+	items->curitem += maxitems;
+
+	return needsplit ? GPTP_SPLIT : GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section, and XLOG record creation (if
+ * needed) is already started.  The target buffer is registered in slot 0.
+ */
+static void
+dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+						void *insertdata, void *ptp_workspace)
+{
+	disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
+
+	/* Apply changes to page */
+	dataPlaceToPageLeafRecompress(buf, leaf);
+
+	/* If needed, register WAL data built by computeLeafRecompressWALData */
+	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+	{
+		XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
+	}
+}
+
+/*
+ * Vacuum a posting tree leaf page.
+ */
+void
+ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
+{
+	Page		page = BufferGetPage(buffer);
+	disassembledLeaf *leaf;
+	bool		removedsomething = false;
+	dlist_iter	iter;
+
+	leaf = disassembleLeaf(page);
+
+	/* Vacuum each segment. */
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
+		int			oldsegsize;
+		ItemPointer cleaned;
+		int			ncleaned;
+
+		if (!seginfo->items)
+			seginfo->items = ginPostingListDecode(seginfo->seg,
+												  &seginfo->nitems);
+		if (seginfo->seg)
+			oldsegsize = SizeOfGinPostingList(seginfo->seg);
+		else
+			oldsegsize = GinDataPageMaxDataSize;
+
+		cleaned = ginVacuumItemPointers(gvs,
+										seginfo->items,
+										seginfo->nitems,
+										&ncleaned);
+		pfree(seginfo->items);
+		seginfo->items = NULL;
+		seginfo->nitems = 0;
+		if (cleaned)
+		{
+			if (ncleaned > 0)
+			{
+				int			npacked;
+
+				seginfo->seg = ginCompressPostingList(cleaned,
+													  ncleaned,
+													  oldsegsize,
+													  &npacked);
+				/* Removing an item never increases the size of the segment */
+				if (npacked != ncleaned)
+					elog(ERROR, "could not fit vacuumed posting list");
+				seginfo->action = GIN_SEGMENT_REPLACE;
+			}
+			else
+			{
+				seginfo->seg = NULL;
+				seginfo->items = NULL;
+				seginfo->action = GIN_SEGMENT_DELETE;
+			}
+			seginfo->nitems = ncleaned;
+
+			removedsomething = true;
+		}
+	}
+
+	/*
+	 * If we removed any items, reconstruct the page from the pieces.
+	 *
+	 * We don't try to re-encode the segments here, even though some of them
+	 * might be really small now that we've removed some items from them. It
+	 * seems like a waste of effort, as there isn't really any benefit from
+	 * larger segments per se; larger segments only help to pack more items in
+	 * the same space. We might as well delay doing that until the next
+	 * insertion, which will need to re-encode at least part of the page
+	 * anyway.
+	 *
+	 * Also note if the page was in uncompressed, pre-9.4 format before, it is
+	 * now represented as one huge segment that contains all the items. It
+	 * might make sense to split that, to speed up random access, but we don't
+	 * bother. You'll have to REINDEX anyway if you want the full gain of the
+	 * new tighter index format.
+	 */
+	if (removedsomething)
+	{
+		bool		modified;
+
+		/*
+		 * Make sure we have a palloc'd copy of all segments, after the first
+		 * segment that is modified. (dataPlaceToPageLeafRecompress requires
+		 * this).
+		 */
+		modified = false;
+		dlist_foreach(iter, &leaf->segments)
+		{
+			leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
+													   iter.cur);
+
+			if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
+				modified = true;
+			if (modified && seginfo->action != GIN_SEGMENT_DELETE)
+			{
+				int			segsize = SizeOfGinPostingList(seginfo->seg);
+				GinPostingList *tmp = (GinPostingList *) palloc(segsize);
+
+				memcpy(tmp, seginfo->seg, segsize);
+				seginfo->seg = tmp;
+			}
+		}
+
+		if (RelationNeedsWAL(indexrel))
+			computeLeafRecompressWALData(leaf);
+
+		/* Apply changes to page */
+		START_CRIT_SECTION();
+
+		dataPlaceToPageLeafRecompress(buffer, leaf);
+
+		MarkBufferDirty(buffer);
+
+		if (RelationNeedsWAL(indexrel))
+		{
+			XLogRecPtr	recptr;
+
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+			XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
+			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
+			PageSetLSN(page, recptr);
+		}
+
+		END_CRIT_SECTION();
+	}
+}
+
+/*
+ * Construct a ginxlogRecompressDataLeaf record representing the changes
+ * in *leaf.  (Because this requires a palloc, we have to do it before
+ * we enter the critical section that actually updates the page.)
+ */
+static void
+computeLeafRecompressWALData(disassembledLeaf *leaf)
+{
+	int			nmodified = 0;
+	char	   *walbufbegin;
+	char	   *walbufend;
+	dlist_iter	iter;
+	int			segno;
+	ginxlogRecompressDataLeaf *recompress_xlog;
+
+	/* Count the modified segments */
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
+												   iter.cur);
+
+		if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
+			nmodified++;
+	}
+
+	walbufbegin =
+		palloc(sizeof(ginxlogRecompressDataLeaf) +
+			   BLCKSZ +			/* max size needed to hold the segment data */
+			   nmodified * 2	/* (segno + action) per action */
+		);
+	walbufend = walbufbegin;
+
+	recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
+	walbufend += sizeof(ginxlogRecompressDataLeaf);
+
+	recompress_xlog->nactions = nmodified;
+
+	segno = 0;
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
+												   iter.cur);
+		int			segsize = 0;
+		int			datalen;
+		uint8		action = seginfo->action;
+
+		if (action == GIN_SEGMENT_UNMODIFIED)
+		{
+			segno++;
+			continue;
+		}
+
+		if (action != GIN_SEGMENT_DELETE)
+			segsize = SizeOfGinPostingList(seginfo->seg);
+
+		/*
+		 * If storing the uncompressed list of added item pointers would take
+		 * more space than storing the compressed segment as is, do that
+		 * instead.
+		 */
+		if (action == GIN_SEGMENT_ADDITEMS &&
+			seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
+		{
+			action = GIN_SEGMENT_REPLACE;
+		}
+
+		*((uint8 *) (walbufend++)) = segno;
+		*(walbufend++) = action;
+
+		switch (action)
+		{
+			case GIN_SEGMENT_DELETE:
+				datalen = 0;
+				break;
+
+			case GIN_SEGMENT_ADDITEMS:
+				datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
+				memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
+				memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
+				datalen += sizeof(uint16);
+				break;
+
+			case GIN_SEGMENT_INSERT:
+			case GIN_SEGMENT_REPLACE:
+				datalen = SHORTALIGN(segsize);
+				memcpy(walbufend, seginfo->seg, segsize);
+				break;
+
+			default:
+				elog(ERROR, "unexpected GIN leaf action %d", action);
+		}
+		walbufend += datalen;
+
+		if (action != GIN_SEGMENT_INSERT)
+			segno++;
+	}
+
+	/* Pass back the constructed info via *leaf */
+	leaf->walinfo = walbufbegin;
+	leaf->walinfolen = walbufend - walbufbegin;
+}
+
+/*
+ * Assemble a disassembled posting tree leaf page back to a buffer.
+ *
+ * This just updates the target buffer; WAL stuff is caller's responsibility.
+ *
+ * NOTE: The segment pointers must not point directly to the same buffer,
+ * except for segments that have not been modified and whose preceding
+ * segments have not been modified either.
+ */
+static void
+dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
+{
+	Page		page = BufferGetPage(buf);
+	char	   *ptr;
+	int			newsize;
+	bool		modified = false;
+	dlist_iter	iter;
+	int			segsize;
+
+	/*
+	 * If the page was in pre-9.4 format before, convert the header, and force
+	 * all segments to be copied to the page whether they were modified or
+	 * not.
+	 */
+	if (!GinPageIsCompressed(page))
+	{
+		Assert(leaf->oldformat);
+		GinPageSetCompressed(page);
+		GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
+		modified = true;
+	}
+
+	ptr = (char *) GinDataLeafPageGetPostingList(page);
+	newsize = 0;
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
+
+		if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
+			modified = true;
+
+		if (seginfo->action != GIN_SEGMENT_DELETE)
+		{
+			segsize = SizeOfGinPostingList(seginfo->seg);
+
+			if (modified)
+				memcpy(ptr, seginfo->seg, segsize);
+
+			ptr += segsize;
+			newsize += segsize;
+		}
+	}
+
+	Assert(newsize <= GinDataPageMaxDataSize);
+	GinDataPageSetDataSize(page, newsize);
+}
+
+/*
+ * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
+ * segments to two pages instead of one.
+ *
+ * This is different from the non-split cases in that this does not modify
+ * the original page directly, but writes to temporary in-memory copies of
+ * the new left and right pages.
+ */
+static void
+dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
+						 ItemPointerData lbound, ItemPointerData rbound,
+						 Page lpage, Page rpage)
+{
+	char	   *ptr;
+	int			segsize;
+	int			lsize;
+	int			rsize;
+	dlist_node *node;
+	dlist_node *firstright;
+	leafSegmentInfo *seginfo;
+
+	/* Initialize temporary pages to hold the new left and right pages */
+	GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
+	GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
+
+	/*
+	 * Copy the segments that go to the left page.
+	 *
+	 * XXX: We should skip copying the unmodified part of the left page, like
+	 * we do when recompressing.
+	 */
+	lsize = 0;
+	ptr = (char *) GinDataLeafPageGetPostingList(lpage);
+	firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
+	for (node = dlist_head_node(&leaf->segments);
+		 node != firstright;
+		 node = dlist_next_node(&leaf->segments, node))
+	{
+		seginfo = dlist_container(leafSegmentInfo, node, node);
+
+		if (seginfo->action != GIN_SEGMENT_DELETE)
+		{
+			segsize = SizeOfGinPostingList(seginfo->seg);
+			memcpy(ptr, seginfo->seg, segsize);
+			ptr += segsize;
+			lsize += segsize;
+		}
+	}
+	Assert(lsize == leaf->lsize);
+	GinDataPageSetDataSize(lpage, lsize);
+	*GinDataPageGetRightBound(lpage) = lbound;
+
+	/* Copy the segments that go to the right page */
+	ptr = (char *) GinDataLeafPageGetPostingList(rpage);
+	rsize = 0;
+	for (node = firstright;
+		 ;
+		 node = dlist_next_node(&leaf->segments, node))
+	{
+		seginfo = dlist_container(leafSegmentInfo, node, node);
+
+		if (seginfo->action != GIN_SEGMENT_DELETE)
+		{
+			segsize = SizeOfGinPostingList(seginfo->seg);
+			memcpy(ptr, seginfo->seg, segsize);
+			ptr += segsize;
+			rsize += segsize;
+		}
+
+		if (!dlist_has_next(&leaf->segments, node))
+			break;
+	}
+	Assert(rsize == leaf->rsize);
+	GinDataPageSetDataSize(rpage, rsize);
+	*GinDataPageGetRightBound(rpage) = rbound;
+}
+
+/*
+ * Prepare to insert data on an internal data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section.  *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
+ */
+static GinPlaceToPageRC
+dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+							 void *insertdata, BlockNumber updateblkno,
+							 void **ptp_workspace,
+							 Page *newlpage, Page *newrpage)
+{
+	Page		page = BufferGetPage(buf);
+
+	/* If it doesn't fit, deal with split case */
+	if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
+	{
+		dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
+							  newlpage, newrpage);
+		return GPTP_SPLIT;
+	}
+
+	/* Else, we're ready to proceed with insertion */
+	return GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section, and XLOG record creation (if
+ * needed) is already started.  The target buffer is registered in slot 0.
+ */
+static void
+dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+							void *insertdata, BlockNumber updateblkno,
+							void *ptp_workspace)
+{
+	Page		page = BufferGetPage(buf);
+	OffsetNumber off = stack->off;
+	PostingItem *pitem;
+
+	/* Update existing downlink to point to next page (on internal page) */
+	pitem = GinDataPageGetPostingItem(page, off);
+	PostingItemSetBlockNumber(pitem, updateblkno);
+
+	/* Add new item */
+	pitem = (PostingItem *) insertdata;
+	GinDataPageAddPostingItem(page, pitem, off);
+
+	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+	{
+		/*
+		 * This must be static, because it has to survive until XLogInsert,
+		 * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
+		 * isn't reentrant anyway.
+		 */
+		static ginxlogInsertDataInternal data;
+
+		data.offset = off;
+		data.newitem = *pitem;
+
+		XLogRegisterBufData(0, (char *) &data,
+							sizeof(ginxlogInsertDataInternal));
+	}
+}
+
+/*
+ * Prepare to insert data on a posting-tree data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section.  *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
+ */
+static GinPlaceToPageRC
+dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+					 void *insertdata, BlockNumber updateblkno,
+					 void **ptp_workspace,
+					 Page *newlpage, Page *newrpage)
+{
+	Page		page = BufferGetPage(buf);
+
+	Assert(GinPageIsData(page));
+
+	if (GinPageIsLeaf(page))
+		return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
+										ptp_workspace,
+										newlpage, newrpage);
+	else
+		return dataBeginPlaceToPageInternal(btree, buf, stack,
+											insertdata, updateblkno,
+											ptp_workspace,
+											newlpage, newrpage);
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section, and XLOG record creation (if
+ * needed) is already started.  The target buffer is registered in slot 0.
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
+ */
+static void
+dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+					void *insertdata, BlockNumber updateblkno,
+					void *ptp_workspace)
+{
+	Page		page = BufferGetPage(buf);
+
+	if (GinPageIsLeaf(page))
+		dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
+								ptp_workspace);
+	else
+		dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
+									updateblkno, ptp_workspace);
+}
+
+/*
+ * Split internal page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
+ */
+static void
+dataSplitPageInternal(GinBtree btree, Buffer origbuf,
+					  GinBtreeStack *stack,
+					  void *insertdata, BlockNumber updateblkno,
+					  Page *newlpage, Page *newrpage)
+{
+	Page		oldpage = BufferGetPage(origbuf);
+	OffsetNumber off = stack->off;
+	int			nitems = GinPageGetOpaque(oldpage)->maxoff;
+	int			nleftitems;
+	int			nrightitems;
+	Size		pageSize = PageGetPageSize(oldpage);
+	ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
+	ItemPointer bound;
+	Page		lpage;
+	Page		rpage;
+	OffsetNumber separator;
+	PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+
+	lpage = PageGetTempPage(oldpage);
+	rpage = PageGetTempPage(oldpage);
+	GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
+	GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
+
+	/*
+	 * First construct a new list of PostingItems, which includes all the old
+	 * items, and the new item.
+	 */
+	memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
+		   (off - 1) * sizeof(PostingItem));
+
+	allitems[off - 1] = *((PostingItem *) insertdata);
+	memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
+		   (nitems - (off - 1)) * sizeof(PostingItem));
+	nitems++;
+
+	/* Update existing downlink to point to next page */
+	PostingItemSetBlockNumber(&allitems[off], updateblkno);
+
+	/*
+	 * When creating a new index, fit as many tuples as possible on the left
+	 * page, on the assumption that the table is scanned from beginning to
+	 * end. This packs the index as tight as possible.
+	 */
+	if (btree->isBuild && GinPageRightMost(oldpage))
+		separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
+	else
+		separator = nitems / 2;
+	nleftitems = separator;
+	nrightitems = nitems - separator;
+
+	memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
+		   allitems,
+		   nleftitems * sizeof(PostingItem));
+	GinPageGetOpaque(lpage)->maxoff = nleftitems;
+	memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
+		   &allitems[separator],
+		   nrightitems * sizeof(PostingItem));
+	GinPageGetOpaque(rpage)->maxoff = nrightitems;
+
+	/*
+	 * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
+	 */
+	GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
+	GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
+
+	/* set up right bound for left page */
+	bound = GinDataPageGetRightBound(lpage);
+	*bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
+
+	/* set up right bound for right page */
+	*GinDataPageGetRightBound(rpage) = oldbound;
+
+	/* return temp pages to caller */
+	*newlpage = lpage;
+	*newrpage = rpage;
+}
+
+/*
+ * Construct insertion payload for inserting the downlink for given buffer.
+ */
+static void *
+dataPrepareDownlink(GinBtree btree, Buffer lbuf)
+{
+	PostingItem *pitem = palloc(sizeof(PostingItem));
+	Page		lpage = BufferGetPage(lbuf);
+
+	PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
+	pitem->key = *GinDataPageGetRightBound(lpage);
+
+	return pitem;
+}
+
+/*
+ * Fills new root by right bound values from child.
+ * Also called from ginxlog, should not use btree
+ */
+void
+ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
+{
+	PostingItem li,
+				ri;
+
+	li.key = *GinDataPageGetRightBound(lpage);
+	PostingItemSetBlockNumber(&li, lblkno);
+	GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
+
+	ri.key = *GinDataPageGetRightBound(rpage);
+	PostingItemSetBlockNumber(&ri, rblkno);
+	GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
+}
+
+
+/*** Functions to work with disassembled leaf pages ***/
+
+/*
+ * Disassemble page into a disassembledLeaf struct.
+ */
+static disassembledLeaf *
+disassembleLeaf(Page page)
+{
+	disassembledLeaf *leaf;
+	GinPostingList *seg;
+	Pointer		segbegin;
+	Pointer		segend;
+
+	leaf = palloc0(sizeof(disassembledLeaf));
+	dlist_init(&leaf->segments);
+
+	if (GinPageIsCompressed(page))
+	{
+		/*
+		 * Create a leafSegmentInfo entry for each segment.
+		 */
+		seg = GinDataLeafPageGetPostingList(page);
+		segbegin = (Pointer) seg;
+		segend = segbegin + GinDataLeafPageGetPostingListSize(page);
+		while ((Pointer) seg < segend)
+		{
+			leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
+
+			seginfo->action = GIN_SEGMENT_UNMODIFIED;
+			seginfo->seg = seg;
+			seginfo->items = NULL;
+			seginfo->nitems = 0;
+			dlist_push_tail(&leaf->segments, &seginfo->node);
+
+			seg = GinNextPostingListSegment(seg);
+		}
+		leaf->oldformat = false;
+	}
+	else
+	{
+		/*
+		 * A pre-9.4 format uncompressed page is represented by a single
+		 * segment, with an array of items.  The corner case is uncompressed
+		 * page containing no items, which is represented as no segments.
+		 */
+		ItemPointer uncompressed;
+		int			nuncompressed;
+		leafSegmentInfo *seginfo;
+
+		uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
+
+		if (nuncompressed > 0)
+		{
+			seginfo = palloc(sizeof(leafSegmentInfo));
+
+			seginfo->action = GIN_SEGMENT_REPLACE;
+			seginfo->seg = NULL;
+			seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
+			memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
+			seginfo->nitems = nuncompressed;
+
+			dlist_push_tail(&leaf->segments, &seginfo->node);
+		}
+
+		leaf->oldformat = true;
+	}
+
+	return leaf;
+}
+
+/*
+ * Distribute newItems to the segments.
+ *
+ * Any segments that acquire new items are decoded, and the new items are
+ * merged with the old items.
+ *
+ * Returns true if any new items were added. False means they were all
+ * duplicates of existing items on the page.
+ */
+static bool
+addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
+{
+	dlist_iter	iter;
+	ItemPointer nextnew = newItems;
+	int			newleft = nNewItems;
+	bool		modified = false;
+	leafSegmentInfo *newseg;
+
+	/*
+	 * If the page is completely empty, just construct one new segment to hold
+	 * all the new items.
+	 */
+	if (dlist_is_empty(&leaf->segments))
+	{
+		newseg = palloc(sizeof(leafSegmentInfo));
+		newseg->seg = NULL;
+		newseg->items = newItems;
+		newseg->nitems = nNewItems;
+		newseg->action = GIN_SEGMENT_INSERT;
+		dlist_push_tail(&leaf->segments, &newseg->node);
+		return true;
+	}
+
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
+		int			nthis;
+		ItemPointer tmpitems;
+		int			ntmpitems;
+
+		/*
+		 * How many of the new items fall into this segment?
+		 */
+		if (!dlist_has_next(&leaf->segments, iter.cur))
+			nthis = newleft;
+		else
+		{
+			leafSegmentInfo *next;
+			ItemPointerData next_first;
+
+			next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
+													   dlist_next_node(&leaf->segments, iter.cur));
+			if (next->items)
+				next_first = next->items[0];
+			else
+			{
+				Assert(next->seg != NULL);
+				next_first = next->seg->first;
+			}
+
+			nthis = 0;
+			while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
+				nthis++;
+		}
+		if (nthis == 0)
+			continue;
+
+		/* Merge the new items with the existing items. */
+		if (!cur->items)
+			cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
+
+		/*
+		 * Fast path for the important special case that we're appending to
+		 * the end of the page: don't let the last segment on the page grow
+		 * larger than the target, create a new segment before that happens.
+		 */
+		if (!dlist_has_next(&leaf->segments, iter.cur) &&
+			ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
+			cur->seg != NULL &&
+			SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
+		{
+			newseg = palloc(sizeof(leafSegmentInfo));
+			newseg->seg = NULL;
+			newseg->items = nextnew;
+			newseg->nitems = nthis;
+			newseg->action = GIN_SEGMENT_INSERT;
+			dlist_push_tail(&leaf->segments, &newseg->node);
+			modified = true;
+			break;
+		}
+
+		tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
+										nextnew, nthis,
+										&ntmpitems);
+		if (ntmpitems != cur->nitems)
+		{
+			/*
+			 * If there are no duplicates, track the added items so that we
+			 * can emit a compact ADDITEMS WAL record later on. (it doesn't
+			 * seem worth re-checking which items were duplicates, if there
+			 * were any)
+			 */
+			if (ntmpitems == nthis + cur->nitems &&
+				cur->action == GIN_SEGMENT_UNMODIFIED)
+			{
+				cur->action = GIN_SEGMENT_ADDITEMS;
+				cur->modifieditems = nextnew;
+				cur->nmodifieditems = nthis;
+			}
+			else
+				cur->action = GIN_SEGMENT_REPLACE;
+
+			cur->items = tmpitems;
+			cur->nitems = ntmpitems;
+			cur->seg = NULL;
+			modified = true;
+		}
+
+		nextnew += nthis;
+		newleft -= nthis;
+		if (newleft == 0)
+			break;
+	}
+
+	return modified;
+}
+
+/*
+ * Recompresses all segments that have been modified.
+ *
+ * If not all the items fit on two pages (ie. after split), we store as
+ * many items as fit, and set *remaining to the first item that didn't fit.
+ * If all items fit, *remaining is set to invalid.
+ *
+ * Returns true if the page has to be split.
+ */
+static bool
+leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
+{
+	int			pgused = 0;
+	bool		needsplit = false;
+	dlist_iter	iter;
+	int			segsize;
+	leafSegmentInfo *nextseg;
+	int			npacked;
+	bool		modified;
+	dlist_node *cur_node;
+	dlist_node *next_node;
+
+	ItemPointerSetInvalid(remaining);
+
+	/*
+	 * cannot use dlist_foreach_modify here because we insert adjacent items
+	 * while iterating.
+	 */
+	for (cur_node = dlist_head_node(&leaf->segments);
+		 cur_node != NULL;
+		 cur_node = next_node)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
+												   cur_node);
+
+		if (dlist_has_next(&leaf->segments, cur_node))
+			next_node = dlist_next_node(&leaf->segments, cur_node);
+		else
+			next_node = NULL;
+
+		/* Compress the posting list, if necessary */
+		if (seginfo->action != GIN_SEGMENT_DELETE)
+		{
+			if (seginfo->seg == NULL)
+			{
+				if (seginfo->nitems > GinPostingListSegmentMaxSize)
+					npacked = 0;	/* no chance that it would fit. */
+				else
+				{
+					seginfo->seg = ginCompressPostingList(seginfo->items,
+														  seginfo->nitems,
+														  GinPostingListSegmentMaxSize,
+														  &npacked);
+				}
+				if (npacked != seginfo->nitems)
+				{
+					/*
+					 * Too large. Compress again to the target size, and
+					 * create a new segment to represent the remaining items.
+					 * The new segment is inserted after this one, so it will
+					 * be processed in the next iteration of this loop.
+					 */
+					if (seginfo->seg)
+						pfree(seginfo->seg);
+					seginfo->seg = ginCompressPostingList(seginfo->items,
+														  seginfo->nitems,
+														  GinPostingListSegmentTargetSize,
+														  &npacked);
+					if (seginfo->action != GIN_SEGMENT_INSERT)
+						seginfo->action = GIN_SEGMENT_REPLACE;
+
+					nextseg = palloc(sizeof(leafSegmentInfo));
+					nextseg->action = GIN_SEGMENT_INSERT;
+					nextseg->seg = NULL;
+					nextseg->items = &seginfo->items[npacked];
+					nextseg->nitems = seginfo->nitems - npacked;
+					next_node = &nextseg->node;
+					dlist_insert_after(cur_node, next_node);
+				}
+			}
+
+			/*
+			 * If the segment is very small, merge it with the next segment.
+			 */
+			if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
+			{
+				int			nmerged;
+
+				nextseg = dlist_container(leafSegmentInfo, node, next_node);
+
+				if (seginfo->items == NULL)
+					seginfo->items = ginPostingListDecode(seginfo->seg,
+														  &seginfo->nitems);
+				if (nextseg->items == NULL)
+					nextseg->items = ginPostingListDecode(nextseg->seg,
+														  &nextseg->nitems);
+				nextseg->items =
+					ginMergeItemPointers(seginfo->items, seginfo->nitems,
+										 nextseg->items, nextseg->nitems,
+										 &nmerged);
+				Assert(nmerged == seginfo->nitems + nextseg->nitems);
+				nextseg->nitems = nmerged;
+				nextseg->seg = NULL;
+
+				nextseg->action = GIN_SEGMENT_REPLACE;
+				nextseg->modifieditems = NULL;
+				nextseg->nmodifieditems = 0;
+
+				if (seginfo->action == GIN_SEGMENT_INSERT)
+				{
+					dlist_delete(cur_node);
+					continue;
+				}
+				else
+				{
+					seginfo->action = GIN_SEGMENT_DELETE;
+					seginfo->seg = NULL;
+				}
+			}
+
+			seginfo->items = NULL;
+			seginfo->nitems = 0;
+		}
+
+		if (seginfo->action == GIN_SEGMENT_DELETE)
+			continue;
+
+		/*
+		 * OK, we now have a compressed version of this segment ready for
+		 * copying to the page. Did we exceed the size that fits on one page?
+		 */
+		segsize = SizeOfGinPostingList(seginfo->seg);
+		if (pgused + segsize > GinDataPageMaxDataSize)
+		{
+			if (!needsplit)
+			{
+				/* switch to right page */
+				Assert(pgused > 0);
+				leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
+				needsplit = true;
+				leaf->lsize = pgused;
+				pgused = 0;
+			}
+			else
+			{
+				/*
+				 * Filled both pages. The last segment we constructed did not
+				 * fit.
+				 */
+				*remaining = seginfo->seg->first;
+
+				/*
+				 * remove all segments that did not fit from the list.
+				 */
+				while (dlist_has_next(&leaf->segments, cur_node))
+					dlist_delete(dlist_next_node(&leaf->segments, cur_node));
+				dlist_delete(cur_node);
+				break;
+			}
+		}
+
+		pgused += segsize;
+	}
+
+	if (!needsplit)
+	{
+		leaf->lsize = pgused;
+		leaf->rsize = 0;
+	}
+	else
+		leaf->rsize = pgused;
+
+	Assert(leaf->lsize <= GinDataPageMaxDataSize);
+	Assert(leaf->rsize <= GinDataPageMaxDataSize);
+
+	/*
+	 * Make a palloc'd copy of every segment after the first modified one,
+	 * because as we start copying items to the original page, we might
+	 * overwrite an existing segment.
+	 */
+	modified = false;
+	dlist_foreach(iter, &leaf->segments)
+	{
+		leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
+												   iter.cur);
+
+		if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
+		{
+			modified = true;
+		}
+		else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
+		{
+			GinPostingList *tmp;
+
+			segsize = SizeOfGinPostingList(seginfo->seg);
+			tmp = palloc(segsize);
+			memcpy(tmp, seginfo->seg, segsize);
+			seginfo->seg = tmp;
+		}
+	}
+
+	return needsplit;
+}
+
+
+/*** Functions that are exported to the rest of the GIN code ***/
+
+/*
+ * Creates new posting tree containing the given TIDs. Returns the page
+ * number of the root of the new posting tree.
+ *
+ * items[] must be in sorted order with no duplicates.
+ */
+BlockNumber
+createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
+				  GinStatsData *buildStats, Buffer entrybuffer)
+{
+	BlockNumber blkno;
+	Buffer		buffer;
+	Page		tmppage;
+	Page		page;
+	Pointer		ptr;
+	int			nrootitems;
+	int			rootsize;
+	bool		is_build = (buildStats != NULL);
+
+	/* Construct the new root page in memory first. */
+	tmppage = (Page) palloc(BLCKSZ);
+	GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
+	GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
+
+	/*
+	 * Write as many of the items to the root page as fit. In segments of max
+	 * GinPostingListSegmentMaxSize bytes each.
+	 */
+	nrootitems = 0;
+	rootsize = 0;
+	ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
+	while (nrootitems < nitems)
+	{
+		GinPostingList *segment;
+		int			npacked;
+		int			segsize;
+
+		segment = ginCompressPostingList(&items[nrootitems],
+										 nitems - nrootitems,
+										 GinPostingListSegmentMaxSize,
+										 &npacked);
+		segsize = SizeOfGinPostingList(segment);
+		if (rootsize + segsize > GinDataPageMaxDataSize)
+			break;
+
+		memcpy(ptr, segment, segsize);
+		ptr += segsize;
+		rootsize += segsize;
+		nrootitems += npacked;
+		pfree(segment);
+	}
+	GinDataPageSetDataSize(tmppage, rootsize);
+
+	/*
+	 * All set. Get a new physical page, and copy the in-memory page to it.
+	 */
+	buffer = GinNewBuffer(index);
+	page = BufferGetPage(buffer);
+	blkno = BufferGetBlockNumber(buffer);
+
+	/*
+	 * Copy any predicate locks from the entry tree leaf (containing posting
+	 * list) to the posting tree.
+	 */
+	PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
+
+	START_CRIT_SECTION();
+
+	PageRestoreTempPage(tmppage, page);
+	MarkBufferDirty(buffer);
+
+	if (RelationNeedsWAL(index) && !is_build)
+	{
+		XLogRecPtr	recptr;
+		ginxlogCreatePostingTree data;
+
+		data.size = rootsize;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
+
+		XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+						 rootsize);
+		XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+
+		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
+		PageSetLSN(page, recptr);
+	}
+
+	UnlockReleaseBuffer(buffer);
+
+	END_CRIT_SECTION();
+
+	/* During index build, count the newly-added data page */
+	if (buildStats)
+		buildStats->nDataPages++;
+
+	elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
+
+	/*
+	 * Add any remaining TIDs to the newly-created posting tree.
+	 */
+	if (nitems > nrootitems)
+	{
+		ginInsertItemPointers(index, blkno,
+							  items + nrootitems,
+							  nitems - nrootitems,
+							  buildStats);
+	}
+
+	return blkno;
+}
+
+static void
+ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
+{
+	memset(btree, 0, sizeof(GinBtreeData));
+
+	btree->index = index;
+	btree->rootBlkno = rootBlkno;
+
+	btree->findChildPage = dataLocateItem;
+	btree->getLeftMostChild = dataGetLeftMostPage;
+	btree->isMoveRight = dataIsMoveRight;
+	btree->findItem = NULL;
+	btree->findChildPtr = dataFindChildPtr;
+	btree->beginPlaceToPage = dataBeginPlaceToPage;
+	btree->execPlaceToPage = dataExecPlaceToPage;
+	btree->fillRoot = ginDataFillRoot;
+	btree->prepareDownlink = dataPrepareDownlink;
+
+	btree->isData = true;
+	btree->fullScan = false;
+	btree->isBuild = false;
+}
+
+/*
+ * Inserts array of item pointers, may execute several tree scan (very rare)
+ */
+void
+ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
+					  ItemPointerData *items, uint32 nitem,
+					  GinStatsData *buildStats)
+{
+	GinBtreeData btree;
+	GinBtreeDataLeafInsertData insertdata;
+	GinBtreeStack *stack;
+
+	ginPrepareDataScan(&btree, index, rootBlkno);
+	btree.isBuild = (buildStats != NULL);
+	insertdata.items = items;
+	insertdata.nitem = nitem;
+	insertdata.curitem = 0;
+
+	while (insertdata.curitem < insertdata.nitem)
+	{
+		/* search for the leaf page where the first item should go to */
+		btree.itemptr = insertdata.items[insertdata.curitem];
+		stack = ginFindLeafPage(&btree, false, true, NULL);
+
+		ginInsertValue(&btree, stack, &insertdata, buildStats);
+	}
+}
+
+/*
+ * Starts a new scan on a posting tree.
+ */
+GinBtreeStack *
+ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
+						Snapshot snapshot)
+{
+	GinBtreeStack *stack;
+
+	ginPrepareDataScan(btree, index, rootBlkno);
+
+	btree->fullScan = true;
+
+	stack = ginFindLeafPage(btree, true, false, snapshot);
+
+	return stack;
+}
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
new file mode 100644
index 0000000..29c36bc
--- /dev/null
+++ b/src/backend/access/gin/ginentrypage.c
@@ -0,0 +1,772 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginentrypage.c
+ *	  routines for handling GIN entry tree pages.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginentrypage.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xloginsert.h"
+#include "miscadmin.h"
+#include "utils/rel.h"
+
+static void entrySplitPage(GinBtree btree, Buffer origbuf,
+						   GinBtreeStack *stack,
+						   GinBtreeEntryInsertData *insertData,
+						   BlockNumber updateblkno,
+						   Page *newlpage, Page *newrpage);
+
+/*
+ * Form a tuple for entry tree.
+ *
+ * If the tuple would be too big to be stored, function throws a suitable
+ * error if errorTooBig is true, or returns NULL if errorTooBig is false.
+ *
+ * See src/backend/access/gin/README for a description of the index tuple
+ * format that is being built here.  We build on the assumption that we
+ * are making a leaf-level key entry containing a posting list of nipd items.
+ * If the caller is actually trying to make a posting-tree entry, non-leaf
+ * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite
+ * the t_tid fields as necessary.  In any case, 'data' can be NULL to skip
+ * filling in the posting list; the caller is responsible for filling it
+ * afterwards if data = NULL and nipd > 0.
+ */
+IndexTuple
+GinFormTuple(GinState *ginstate,
+			 OffsetNumber attnum, Datum key, GinNullCategory category,
+			 Pointer data, Size dataSize, int nipd,
+			 bool errorTooBig)
+{
+	Datum		datums[2];
+	bool		isnull[2];
+	IndexTuple	itup;
+	uint32		newsize;
+
+	/* Build the basic tuple: optional column number, plus key datum */
+	if (ginstate->oneCol)
+	{
+		datums[0] = key;
+		isnull[0] = (category != GIN_CAT_NORM_KEY);
+	}
+	else
+	{
+		datums[0] = UInt16GetDatum(attnum);
+		isnull[0] = false;
+		datums[1] = key;
+		isnull[1] = (category != GIN_CAT_NORM_KEY);
+	}
+
+	itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull);
+
+	/*
+	 * Determine and store offset to the posting list, making sure there is
+	 * room for the category byte if needed.
+	 *
+	 * Note: because index_form_tuple MAXALIGNs the tuple size, there may well
+	 * be some wasted pad space.  Is it worth recomputing the data length to
+	 * prevent that?  That would also allow us to Assert that the real data
+	 * doesn't overlap the GinNullCategory byte, which this code currently
+	 * takes on faith.
+	 */
+	newsize = IndexTupleSize(itup);
+
+	if (IndexTupleHasNulls(itup))
+	{
+		uint32		minsize;
+
+		Assert(category != GIN_CAT_NORM_KEY);
+		minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory);
+		newsize = Max(newsize, minsize);
+	}
+
+	newsize = SHORTALIGN(newsize);
+
+	GinSetPostingOffset(itup, newsize);
+	GinSetNPosting(itup, nipd);
+
+	/*
+	 * Add space needed for posting list, if any.  Then check that the tuple
+	 * won't be too big to store.
+	 */
+	newsize += dataSize;
+
+	newsize = MAXALIGN(newsize);
+
+	if (newsize > GinMaxItemSize)
+	{
+		if (errorTooBig)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
+							(Size) newsize, (Size) GinMaxItemSize,
+							RelationGetRelationName(ginstate->index))));
+		pfree(itup);
+		return NULL;
+	}
+
+	/*
+	 * Resize tuple if needed
+	 */
+	if (newsize != IndexTupleSize(itup))
+	{
+		itup = repalloc(itup, newsize);
+
+		/*
+		 * PostgreSQL 9.3 and earlier did not clear this new space, so we
+		 * might find uninitialized padding when reading tuples from disk.
+		 */
+		memset((char *) itup + IndexTupleSize(itup),
+			   0, newsize - IndexTupleSize(itup));
+		/* set new size in tuple header */
+		itup->t_info &= ~INDEX_SIZE_MASK;
+		itup->t_info |= newsize;
+	}
+
+	/*
+	 * Copy in the posting list, if provided
+	 */
+	if (data)
+	{
+		char	   *ptr = GinGetPosting(itup);
+
+		memcpy(ptr, data, dataSize);
+	}
+
+	/*
+	 * Insert category byte, if needed
+	 */
+	if (category != GIN_CAT_NORM_KEY)
+	{
+		Assert(IndexTupleHasNulls(itup));
+		GinSetNullCategory(itup, ginstate, category);
+	}
+	return itup;
+}
+
+/*
+ * Read item pointers from leaf entry tuple.
+ *
+ * Returns a palloc'd array of ItemPointers. The number of items is returned
+ * in *nitems.
+ */
+ItemPointer
+ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup,
+			 int *nitems)
+{
+	Pointer		ptr = GinGetPosting(itup);
+	int			nipd = GinGetNPosting(itup);
+	ItemPointer ipd;
+	int			ndecoded;
+
+	if (GinItupIsCompressed(itup))
+	{
+		if (nipd > 0)
+		{
+			ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
+			if (nipd != ndecoded)
+				elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
+					 nipd, ndecoded);
+		}
+		else
+		{
+			ipd = palloc(0);
+		}
+	}
+	else
+	{
+		ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
+		memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
+	}
+	*nitems = nipd;
+	return ipd;
+}
+
+/*
+ * Form a non-leaf entry tuple by copying the key data from the given tuple,
+ * which can be either a leaf or non-leaf entry tuple.
+ *
+ * Any posting list in the source tuple is not copied.  The specified child
+ * block number is inserted into t_tid.
+ */
+static IndexTuple
+GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk)
+{
+	IndexTuple	nitup;
+
+	if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
+	{
+		/* Tuple contains a posting list, just copy stuff before that */
+		uint32		origsize = GinGetPostingOffset(itup);
+
+		origsize = MAXALIGN(origsize);
+		nitup = (IndexTuple) palloc(origsize);
+		memcpy(nitup, itup, origsize);
+		/* ... be sure to fix the size header field ... */
+		nitup->t_info &= ~INDEX_SIZE_MASK;
+		nitup->t_info |= origsize;
+	}
+	else
+	{
+		/* Copy the tuple as-is */
+		nitup = (IndexTuple) palloc(IndexTupleSize(itup));
+		memcpy(nitup, itup, IndexTupleSize(itup));
+	}
+
+	/* Now insert the correct downlink */
+	GinSetDownlink(nitup, childblk);
+
+	return nitup;
+}
+
+/*
+ * Entry tree is a "static", ie tuple never deletes from it,
+ * so we don't use right bound, we use rightmost key instead.
+ */
+static IndexTuple
+getRightMostTuple(Page page)
+{
+	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+	return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
+}
+
+static bool
+entryIsMoveRight(GinBtree btree, Page page)
+{
+	IndexTuple	itup;
+	OffsetNumber attnum;
+	Datum		key;
+	GinNullCategory category;
+
+	if (GinPageRightMost(page))
+		return false;
+
+	itup = getRightMostTuple(page);
+	attnum = gintuple_get_attrnum(btree->ginstate, itup);
+	key = gintuple_get_key(btree->ginstate, itup, &category);
+
+	if (ginCompareAttEntries(btree->ginstate,
+							 btree->entryAttnum, btree->entryKey, btree->entryCategory,
+							 attnum, key, category) > 0)
+		return true;
+
+	return false;
+}
+
+/*
+ * Find correct tuple in non-leaf page. It supposed that
+ * page correctly chosen and searching value SHOULD be on page
+ */
+static BlockNumber
+entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
+{
+	OffsetNumber low,
+				high,
+				maxoff;
+	IndexTuple	itup = NULL;
+	int			result;
+	Page		page = BufferGetPage(stack->buffer);
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(!GinPageIsData(page));
+
+	if (btree->fullScan)
+	{
+		stack->off = FirstOffsetNumber;
+		stack->predictNumber *= PageGetMaxOffsetNumber(page);
+		return btree->getLeftMostChild(btree, page);
+	}
+
+	low = FirstOffsetNumber;
+	maxoff = high = PageGetMaxOffsetNumber(page);
+	Assert(high >= low);
+
+	high++;
+
+	while (high > low)
+	{
+		OffsetNumber mid = low + ((high - low) / 2);
+
+		if (mid == maxoff && GinPageRightMost(page))
+		{
+			/* Right infinity */
+			result = -1;
+		}
+		else
+		{
+			OffsetNumber attnum;
+			Datum		key;
+			GinNullCategory category;
+
+			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
+			attnum = gintuple_get_attrnum(btree->ginstate, itup);
+			key = gintuple_get_key(btree->ginstate, itup, &category);
+			result = ginCompareAttEntries(btree->ginstate,
+										  btree->entryAttnum,
+										  btree->entryKey,
+										  btree->entryCategory,
+										  attnum, key, category);
+		}
+
+		if (result == 0)
+		{
+			stack->off = mid;
+			Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
+			return GinGetDownlink(itup);
+		}
+		else if (result > 0)
+			low = mid + 1;
+		else
+			high = mid;
+	}
+
+	Assert(high >= FirstOffsetNumber && high <= maxoff);
+
+	stack->off = high;
+	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
+	Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
+	return GinGetDownlink(itup);
+}
+
+/*
+ * Searches correct position for value on leaf page.
+ * Page should be correctly chosen.
+ * Returns true if value found on page.
+ */
+static bool
+entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
+{
+	Page		page = BufferGetPage(stack->buffer);
+	OffsetNumber low,
+				high;
+
+	Assert(GinPageIsLeaf(page));
+	Assert(!GinPageIsData(page));
+
+	if (btree->fullScan)
+	{
+		stack->off = FirstOffsetNumber;
+		return true;
+	}
+
+	low = FirstOffsetNumber;
+	high = PageGetMaxOffsetNumber(page);
+
+	if (high < low)
+	{
+		stack->off = FirstOffsetNumber;
+		return false;
+	}
+
+	high++;
+
+	while (high > low)
+	{
+		OffsetNumber mid = low + ((high - low) / 2);
+		IndexTuple	itup;
+		OffsetNumber attnum;
+		Datum		key;
+		GinNullCategory category;
+		int			result;
+
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
+		attnum = gintuple_get_attrnum(btree->ginstate, itup);
+		key = gintuple_get_key(btree->ginstate, itup, &category);
+		result = ginCompareAttEntries(btree->ginstate,
+									  btree->entryAttnum,
+									  btree->entryKey,
+									  btree->entryCategory,
+									  attnum, key, category);
+		if (result == 0)
+		{
+			stack->off = mid;
+			return true;
+		}
+		else if (result > 0)
+			low = mid + 1;
+		else
+			high = mid;
+	}
+
+	stack->off = high;
+	return false;
+}
+
+static OffsetNumber
+entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
+{
+	OffsetNumber i,
+				maxoff = PageGetMaxOffsetNumber(page);
+	IndexTuple	itup;
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(!GinPageIsData(page));
+
+	/* if page isn't changed, we returns storedOff */
+	if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
+	{
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
+		if (GinGetDownlink(itup) == blkno)
+			return storedOff;
+
+		/*
+		 * we hope, that needed pointer goes to right. It's true if there
+		 * wasn't a deletion
+		 */
+		for (i = storedOff + 1; i <= maxoff; i++)
+		{
+			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+			if (GinGetDownlink(itup) == blkno)
+				return i;
+		}
+		maxoff = storedOff - 1;
+	}
+
+	/* last chance */
+	for (i = FirstOffsetNumber; i <= maxoff; i++)
+	{
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+		if (GinGetDownlink(itup) == blkno)
+			return i;
+	}
+
+	return InvalidOffsetNumber;
+}
+
+static BlockNumber
+entryGetLeftMostPage(GinBtree btree, Page page)
+{
+	IndexTuple	itup;
+
+	Assert(!GinPageIsLeaf(page));
+	Assert(!GinPageIsData(page));
+	Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
+
+	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
+	return GinGetDownlink(itup);
+}
+
+static bool
+entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off,
+				   GinBtreeEntryInsertData *insertData)
+{
+	Size		releasedsz = 0;
+	Size		addedsz;
+	Page		page = BufferGetPage(buf);
+
+	Assert(insertData->entry);
+	Assert(!GinPageIsData(page));
+
+	if (insertData->isDelete)
+	{
+		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+
+		releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
+	}
+
+	addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData);
+
+	if (PageGetFreeSpace(page) + releasedsz >= addedsz)
+		return true;
+
+	return false;
+}
+
+/*
+ * Delete tuple on leaf page if tuples existed and we
+ * should update it, update old child blkno to new right page
+ * if child split occurred
+ */
+static void
+entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
+				 GinBtreeEntryInsertData *insertData, BlockNumber updateblkno)
+{
+	Assert(insertData->entry);
+	Assert(!GinPageIsData(page));
+
+	if (insertData->isDelete)
+	{
+		Assert(GinPageIsLeaf(page));
+		PageIndexTupleDelete(page, off);
+	}
+
+	if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber)
+	{
+		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+
+		GinSetDownlink(itup, updateblkno);
+	}
+}
+
+/*
+ * Prepare to insert data on an entry page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section.  *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
+ */
+static GinPlaceToPageRC
+entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+					  void *insertPayload, BlockNumber updateblkno,
+					  void **ptp_workspace,
+					  Page *newlpage, Page *newrpage)
+{
+	GinBtreeEntryInsertData *insertData = insertPayload;
+	OffsetNumber off = stack->off;
+
+	/* If it doesn't fit, deal with split case */
+	if (!entryIsEnoughSpace(btree, buf, off, insertData))
+	{
+		entrySplitPage(btree, buf, stack, insertData, updateblkno,
+					   newlpage, newrpage);
+		return GPTP_SPLIT;
+	}
+
+	/* Else, we're ready to proceed with insertion */
+	return GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section, and XLOG record creation (if
+ * needed) is already started.  The target buffer is registered in slot 0.
+ */
+static void
+entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+					 void *insertPayload, BlockNumber updateblkno,
+					 void *ptp_workspace)
+{
+	GinBtreeEntryInsertData *insertData = insertPayload;
+	Page		page = BufferGetPage(buf);
+	OffsetNumber off = stack->off;
+	OffsetNumber placed;
+
+	entryPreparePage(btree, page, off, insertData, updateblkno);
+
+	placed = PageAddItem(page,
+						 (Item) insertData->entry,
+						 IndexTupleSize(insertData->entry),
+						 off, false, false);
+	if (placed != off)
+		elog(ERROR, "failed to add item to index page in \"%s\"",
+			 RelationGetRelationName(btree->index));
+
+	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+	{
+		/*
+		 * This must be static, because it has to survive until XLogInsert,
+		 * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
+		 * isn't reentrant anyway.
+		 */
+		static ginxlogInsertEntry data;
+
+		data.isDelete = insertData->isDelete;
+		data.offset = off;
+
+		XLogRegisterBufData(0, (char *) &data,
+							offsetof(ginxlogInsertEntry, tuple));
+		XLogRegisterBufData(0, (char *) insertData->entry,
+							IndexTupleSize(insertData->entry));
+	}
+}
+
+/*
+ * Split entry page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
+ */
+static void
+entrySplitPage(GinBtree btree, Buffer origbuf,
+			   GinBtreeStack *stack,
+			   GinBtreeEntryInsertData *insertData,
+			   BlockNumber updateblkno,
+			   Page *newlpage, Page *newrpage)
+{
+	OffsetNumber off = stack->off;
+	OffsetNumber i,
+				maxoff,
+				separator = InvalidOffsetNumber;
+	Size		totalsize = 0;
+	Size		lsize = 0,
+				size;
+	char	   *ptr;
+	IndexTuple	itup;
+	Page		page;
+	Page		lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
+	Page		rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
+	Size		pageSize = PageGetPageSize(lpage);
+	PGAlignedBlock tupstore[2]; /* could need 2 pages' worth of tuples */
+
+	entryPreparePage(btree, lpage, off, insertData, updateblkno);
+
+	/*
+	 * First, append all the existing tuples and the new tuple we're inserting
+	 * one after another in a temporary workspace.
+	 */
+	maxoff = PageGetMaxOffsetNumber(lpage);
+	ptr = tupstore[0].data;
+	for (i = FirstOffsetNumber; i <= maxoff; i++)
+	{
+		if (i == off)
+		{
+			size = MAXALIGN(IndexTupleSize(insertData->entry));
+			memcpy(ptr, insertData->entry, size);
+			ptr += size;
+			totalsize += size + sizeof(ItemIdData);
+		}
+
+		itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
+		size = MAXALIGN(IndexTupleSize(itup));
+		memcpy(ptr, itup, size);
+		ptr += size;
+		totalsize += size + sizeof(ItemIdData);
+	}
+
+	if (off == maxoff + 1)
+	{
+		size = MAXALIGN(IndexTupleSize(insertData->entry));
+		memcpy(ptr, insertData->entry, size);
+		ptr += size;
+		totalsize += size + sizeof(ItemIdData);
+	}
+
+	/*
+	 * Initialize the left and right pages, and copy all the tuples back to
+	 * them.
+	 */
+	GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
+	GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
+
+	ptr = tupstore[0].data;
+	maxoff++;
+	lsize = 0;
+
+	page = lpage;
+	for (i = FirstOffsetNumber; i <= maxoff; i++)
+	{
+		itup = (IndexTuple) ptr;
+
+		/*
+		 * Decide where to split.  We try to equalize the pages' total data
+		 * size, not number of tuples.
+		 */
+		if (lsize > totalsize / 2)
+		{
+			if (separator == InvalidOffsetNumber)
+				separator = i - 1;
+			page = rpage;
+		}
+		else
+		{
+			lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
+		}
+
+		if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to index page in \"%s\"",
+				 RelationGetRelationName(btree->index));
+		ptr += MAXALIGN(IndexTupleSize(itup));
+	}
+
+	/* return temp pages to caller */
+	*newlpage = lpage;
+	*newrpage = rpage;
+}
+
+/*
+ * Construct insertion payload for inserting the downlink for given buffer.
+ */
+static void *
+entryPrepareDownlink(GinBtree btree, Buffer lbuf)
+{
+	GinBtreeEntryInsertData *insertData;
+	Page		lpage = BufferGetPage(lbuf);
+	BlockNumber lblkno = BufferGetBlockNumber(lbuf);
+	IndexTuple	itup;
+
+	itup = getRightMostTuple(lpage);
+
+	insertData = palloc(sizeof(GinBtreeEntryInsertData));
+	insertData->entry = GinFormInteriorTuple(itup, lpage, lblkno);
+	insertData->isDelete = false;
+
+	return insertData;
+}
+
+/*
+ * Fills new root by rightest values from child.
+ * Also called from ginxlog, should not use btree
+ */
+void
+ginEntryFillRoot(GinBtree btree, Page root,
+				 BlockNumber lblkno, Page lpage,
+				 BlockNumber rblkno, Page rpage)
+{
+	IndexTuple	itup;
+
+	itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno);
+	if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+		elog(ERROR, "failed to add item to index root page");
+	pfree(itup);
+
+	itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno);
+	if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+		elog(ERROR, "failed to add item to index root page");
+	pfree(itup);
+}
+
+/*
+ * Set up GinBtree for entry page access
+ *
+ * Note: during WAL recovery, there may be no valid data in ginstate
+ * other than a faked-up Relation pointer; the key datum is bogus too.
+ */
+void
+ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
+					Datum key, GinNullCategory category,
+					GinState *ginstate)
+{
+	memset(btree, 0, sizeof(GinBtreeData));
+
+	btree->index = ginstate->index;
+	btree->rootBlkno = GIN_ROOT_BLKNO;
+	btree->ginstate = ginstate;
+
+	btree->findChildPage = entryLocateEntry;
+	btree->getLeftMostChild = entryGetLeftMostPage;
+	btree->isMoveRight = entryIsMoveRight;
+	btree->findItem = entryLocateLeafEntry;
+	btree->findChildPtr = entryFindChildPtr;
+	btree->beginPlaceToPage = entryBeginPlaceToPage;
+	btree->execPlaceToPage = entryExecPlaceToPage;
+	btree->fillRoot = ginEntryFillRoot;
+	btree->prepareDownlink = entryPrepareDownlink;
+
+	btree->isData = false;
+	btree->fullScan = false;
+	btree->isBuild = false;
+
+	btree->entryAttnum = attnum;
+	btree->entryKey = key;
+	btree->entryCategory = category;
+}
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
new file mode 100644
index 0000000..e0d9940
--- /dev/null
+++ b/src/backend/access/gin/ginfast.c
@@ -0,0 +1,1068 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ *	  Fast insert routines for the Postgres inverted index access method.
+ *	  Pending entries are stored in linear list of pages.  Later on
+ *	  (typically during VACUUM), ginInsertCleanup() will be invoked to
+ *	  transfer pending entries into the regular index structure.  This
+ *	  wins because bulk insertion is much more efficient than retail.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginfast.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "catalog/pg_am.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "postmaster/autovacuum.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+/* GUC parameter */
+int			gin_pending_list_limit = 0;
+
+#define GIN_PAGE_FREESIZE \
+	( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+typedef struct KeyArray
+{
+	Datum	   *keys;			/* expansible array */
+	GinNullCategory *categories;	/* another expansible array */
+	int32		nvalues;		/* current number of valid entries */
+	int32		maxvalues;		/* allocated size of arrays */
+} KeyArray;
+
+
+/*
+ * Build a pending-list page from the given array of tuples, and write it out.
+ *
+ * Returns amount of free space left on the page.
+ */
+static int32
+writeListPage(Relation index, Buffer buffer,
+			  IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+{
+	Page		page = BufferGetPage(buffer);
+	int32		i,
+				freesize,
+				size = 0;
+	OffsetNumber l,
+				off;
+	PGAlignedBlock workspace;
+	char	   *ptr;
+
+	START_CRIT_SECTION();
+
+	GinInitBuffer(buffer, GIN_LIST);
+
+	off = FirstOffsetNumber;
+	ptr = workspace.data;
+
+	for (i = 0; i < ntuples; i++)
+	{
+		int			this_size = IndexTupleSize(tuples[i]);
+
+		memcpy(ptr, tuples[i], this_size);
+		ptr += this_size;
+		size += this_size;
+
+		l = PageAddItem(page, (Item) tuples[i], this_size, off, false, false);
+
+		if (l == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to index page in \"%s\"",
+				 RelationGetRelationName(index));
+
+		off++;
+	}
+
+	Assert(size <= BLCKSZ);		/* else we overran workspace */
+
+	GinPageGetOpaque(page)->rightlink = rightlink;
+
+	/*
+	 * tail page may contain only whole row(s) or final part of row placed on
+	 * previous pages (a "row" here meaning all the index tuples generated for
+	 * one heap tuple)
+	 */
+	if (rightlink == InvalidBlockNumber)
+	{
+		GinPageSetFullRow(page);
+		GinPageGetOpaque(page)->maxoff = 1;
+	}
+	else
+	{
+		GinPageGetOpaque(page)->maxoff = 0;
+	}
+
+	MarkBufferDirty(buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		ginxlogInsertListPage data;
+		XLogRecPtr	recptr;
+
+		data.rightlink = rightlink;
+		data.ntuples = ntuples;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
+
+		XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+		XLogRegisterBufData(0, workspace.data, size);
+
+		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
+		PageSetLSN(page, recptr);
+	}
+
+	/* get free space before releasing buffer */
+	freesize = PageGetExactFreeSpace(page);
+
+	UnlockReleaseBuffer(buffer);
+
+	END_CRIT_SECTION();
+
+	return freesize;
+}
+
+static void
+makeSublist(Relation index, IndexTuple *tuples, int32 ntuples,
+			GinMetaPageData *res)
+{
+	Buffer		curBuffer = InvalidBuffer;
+	Buffer		prevBuffer = InvalidBuffer;
+	int			i,
+				size = 0,
+				tupsize;
+	int			startTuple = 0;
+
+	Assert(ntuples > 0);
+
+	/*
+	 * Split tuples into pages
+	 */
+	for (i = 0; i < ntuples; i++)
+	{
+		if (curBuffer == InvalidBuffer)
+		{
+			curBuffer = GinNewBuffer(index);
+
+			if (prevBuffer != InvalidBuffer)
+			{
+				res->nPendingPages++;
+				writeListPage(index, prevBuffer,
+							  tuples + startTuple,
+							  i - startTuple,
+							  BufferGetBlockNumber(curBuffer));
+			}
+			else
+			{
+				res->head = BufferGetBlockNumber(curBuffer);
+			}
+
+			prevBuffer = curBuffer;
+			startTuple = i;
+			size = 0;
+		}
+
+		tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData);
+
+		if (size + tupsize > GinListPageSize)
+		{
+			/* won't fit, force a new page and reprocess */
+			i--;
+			curBuffer = InvalidBuffer;
+		}
+		else
+		{
+			size += tupsize;
+		}
+	}
+
+	/*
+	 * Write last page
+	 */
+	res->tail = BufferGetBlockNumber(curBuffer);
+	res->tailFreeSize = writeListPage(index, curBuffer,
+									  tuples + startTuple,
+									  ntuples - startTuple,
+									  InvalidBlockNumber);
+	res->nPendingPages++;
+	/* that was only one heap tuple */
+	res->nPendingHeapTuples = 1;
+}
+
+/*
+ * Write the index tuples contained in *collector into the index's
+ * pending list.
+ *
+ * Function guarantees that all these tuples will be inserted consecutively,
+ * preserving order
+ */
+void
+ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
+{
+	Relation	index = ginstate->index;
+	Buffer		metabuffer;
+	Page		metapage;
+	GinMetaPageData *metadata = NULL;
+	Buffer		buffer = InvalidBuffer;
+	Page		page = NULL;
+	ginxlogUpdateMeta data;
+	bool		separateList = false;
+	bool		needCleanup = false;
+	int			cleanupSize;
+	bool		needWal;
+
+	if (collector->ntuples == 0)
+		return;
+
+	needWal = RelationNeedsWAL(index);
+
+	data.node = index->rd_node;
+	data.ntuples = 0;
+	data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+	metapage = BufferGetPage(metabuffer);
+
+	/*
+	 * An insertion to the pending list could logically belong anywhere in the
+	 * tree, so it conflicts with all serializable scans.  All scans acquire a
+	 * predicate lock on the metabuffer to represent that.
+	 */
+	CheckForSerializableConflictIn(index, NULL, GIN_METAPAGE_BLKNO);
+
+	if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize)
+	{
+		/*
+		 * Total size is greater than one page => make sublist
+		 */
+		separateList = true;
+	}
+	else
+	{
+		LockBuffer(metabuffer, GIN_EXCLUSIVE);
+		metadata = GinPageGetMeta(metapage);
+
+		if (metadata->head == InvalidBlockNumber ||
+			collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize)
+		{
+			/*
+			 * Pending list is empty or total size is greater than freespace
+			 * on tail page => make sublist
+			 *
+			 * We unlock metabuffer to keep high concurrency
+			 */
+			separateList = true;
+			LockBuffer(metabuffer, GIN_UNLOCK);
+		}
+	}
+
+	if (separateList)
+	{
+		/*
+		 * We should make sublist separately and append it to the tail
+		 */
+		GinMetaPageData sublist;
+
+		memset(&sublist, 0, sizeof(GinMetaPageData));
+		makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+		if (needWal)
+			XLogBeginInsert();
+
+		/*
+		 * metapage was unlocked, see above
+		 */
+		LockBuffer(metabuffer, GIN_EXCLUSIVE);
+		metadata = GinPageGetMeta(metapage);
+
+		if (metadata->head == InvalidBlockNumber)
+		{
+			/*
+			 * Main list is empty, so just insert sublist as main list
+			 */
+			START_CRIT_SECTION();
+
+			metadata->head = sublist.head;
+			metadata->tail = sublist.tail;
+			metadata->tailFreeSize = sublist.tailFreeSize;
+
+			metadata->nPendingPages = sublist.nPendingPages;
+			metadata->nPendingHeapTuples = sublist.nPendingHeapTuples;
+		}
+		else
+		{
+			/*
+			 * Merge lists
+			 */
+			data.prevTail = metadata->tail;
+			data.newRightlink = sublist.head;
+
+			buffer = ReadBuffer(index, metadata->tail);
+			LockBuffer(buffer, GIN_EXCLUSIVE);
+			page = BufferGetPage(buffer);
+
+			Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+			START_CRIT_SECTION();
+
+			GinPageGetOpaque(page)->rightlink = sublist.head;
+
+			MarkBufferDirty(buffer);
+
+			metadata->tail = sublist.tail;
+			metadata->tailFreeSize = sublist.tailFreeSize;
+
+			metadata->nPendingPages += sublist.nPendingPages;
+			metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+			if (needWal)
+				XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+		}
+	}
+	else
+	{
+		/*
+		 * Insert into tail page.  Metapage is already locked
+		 */
+		OffsetNumber l,
+					off;
+		int			i,
+					tupsize;
+		char	   *ptr;
+		char	   *collectordata;
+
+		buffer = ReadBuffer(index, metadata->tail);
+		LockBuffer(buffer, GIN_EXCLUSIVE);
+		page = BufferGetPage(buffer);
+
+		off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+			OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+		collectordata = ptr = (char *) palloc(collector->sumsize);
+
+		data.ntuples = collector->ntuples;
+
+		if (needWal)
+			XLogBeginInsert();
+
+		START_CRIT_SECTION();
+
+		/*
+		 * Increase counter of heap tuples
+		 */
+		Assert(GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples);
+		GinPageGetOpaque(page)->maxoff++;
+		metadata->nPendingHeapTuples++;
+
+		for (i = 0; i < collector->ntuples; i++)
+		{
+			tupsize = IndexTupleSize(collector->tuples[i]);
+			l = PageAddItem(page, (Item) collector->tuples[i], tupsize, off, false, false);
+
+			if (l == InvalidOffsetNumber)
+				elog(ERROR, "failed to add item to index page in \"%s\"",
+					 RelationGetRelationName(index));
+
+			memcpy(ptr, collector->tuples[i], tupsize);
+			ptr += tupsize;
+
+			off++;
+		}
+
+		Assert((ptr - collectordata) <= collector->sumsize);
+		if (needWal)
+		{
+			XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+			XLogRegisterBufData(1, collectordata, collector->sumsize);
+		}
+
+		metadata->tailFreeSize = PageGetExactFreeSpace(page);
+
+		MarkBufferDirty(buffer);
+	}
+
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is essential,
+	 * because without doing so, metadata will be lost if xlog.c compresses
+	 * the page.  (We must do this here because pre-v11 versions of PG did not
+	 * set the metapage's pd_lower correctly, so a pg_upgraded index might
+	 * contain the wrong value.)
+	 */
+	((PageHeader) metapage)->pd_lower =
+		((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
+
+	/*
+	 * Write metabuffer, make xlog entry
+	 */
+	MarkBufferDirty(metabuffer);
+
+	if (needWal)
+	{
+		XLogRecPtr	recptr;
+
+		memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
+
+		XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+		XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+
+		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
+		PageSetLSN(metapage, recptr);
+
+		if (buffer != InvalidBuffer)
+		{
+			PageSetLSN(page, recptr);
+		}
+	}
+
+	if (buffer != InvalidBuffer)
+		UnlockReleaseBuffer(buffer);
+
+	/*
+	 * Force pending list cleanup when it becomes too long. And,
+	 * ginInsertCleanup could take significant amount of time, so we prefer to
+	 * call it when it can do all the work in a single collection cycle. In
+	 * non-vacuum mode, it shouldn't require maintenance_work_mem, so fire it
+	 * while pending list is still small enough to fit into
+	 * gin_pending_list_limit.
+	 *
+	 * ginInsertCleanup() should not be called inside our CRIT_SECTION.
+	 */
+	cleanupSize = GinGetPendingListCleanupSize(index);
+	if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * 1024L)
+		needCleanup = true;
+
+	UnlockReleaseBuffer(metabuffer);
+
+	END_CRIT_SECTION();
+
+	/*
+	 * Since it could contend with concurrent cleanup process we cleanup
+	 * pending list not forcibly.
+	 */
+	if (needCleanup)
+		ginInsertCleanup(ginstate, false, true, false, NULL);
+}
+
+/*
+ * Create temporary index tuples for a single indexable item (one index column
+ * for the heap tuple specified by ht_ctid), and append them to the array
+ * in *collector.  They will subsequently be written out using
+ * ginHeapTupleFastInsert.  Note that to guarantee consistent state, all
+ * temp tuples for a given heap tuple must be written in one call to
+ * ginHeapTupleFastInsert.
+ */
+void
+ginHeapTupleFastCollect(GinState *ginstate,
+						GinTupleCollector *collector,
+						OffsetNumber attnum, Datum value, bool isNull,
+						ItemPointer ht_ctid)
+{
+	Datum	   *entries;
+	GinNullCategory *categories;
+	int32		i,
+				nentries;
+
+	/*
+	 * Extract the key values that need to be inserted in the index
+	 */
+	entries = ginExtractEntries(ginstate, attnum, value, isNull,
+								&nentries, &categories);
+
+	/*
+	 * Protect against integer overflow in allocation calculations
+	 */
+	if (nentries < 0 ||
+		collector->ntuples + nentries > MaxAllocSize / sizeof(IndexTuple))
+		elog(ERROR, "too many entries for GIN index");
+
+	/*
+	 * Allocate/reallocate memory for storing collected tuples
+	 */
+	if (collector->tuples == NULL)
+	{
+		/*
+		 * Determine the number of elements to allocate in the tuples array
+		 * initially.  Make it a power of 2 to avoid wasting memory when
+		 * resizing (since palloc likes powers of 2).
+		 */
+		collector->lentuples = pg_nextpower2_32(Max(16, nentries));
+		collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples);
+	}
+	else if (collector->lentuples < collector->ntuples + nentries)
+	{
+		/*
+		 * Advance lentuples to the next suitable power of 2.  This won't
+		 * overflow, though we could get to a value that exceeds
+		 * MaxAllocSize/sizeof(IndexTuple), causing an error in repalloc.
+		 */
+		collector->lentuples = pg_nextpower2_32(collector->ntuples + nentries);
+		collector->tuples = (IndexTuple *) repalloc(collector->tuples,
+													sizeof(IndexTuple) * collector->lentuples);
+	}
+
+	/*
+	 * Build an index tuple for each key value, and add to array.  In pending
+	 * tuples we just stick the heap TID into t_tid.
+	 */
+	for (i = 0; i < nentries; i++)
+	{
+		IndexTuple	itup;
+
+		itup = GinFormTuple(ginstate, attnum, entries[i], categories[i],
+							NULL, 0, 0, true);
+		itup->t_tid = *ht_ctid;
+		collector->tuples[collector->ntuples++] = itup;
+		collector->sumsize += IndexTupleSize(itup);
+	}
+}
+
+/*
+ * Deletes pending list pages up to (not including) newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ *
+ * metapage is pinned and exclusive-locked throughout this function.
+ */
+static void
+shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+		  bool fill_fsm, IndexBulkDeleteResult *stats)
+{
+	Page		metapage;
+	GinMetaPageData *metadata;
+	BlockNumber blknoToDelete;
+
+	metapage = BufferGetPage(metabuffer);
+	metadata = GinPageGetMeta(metapage);
+	blknoToDelete = metadata->head;
+
+	do
+	{
+		Page		page;
+		int			i;
+		int64		nDeletedHeapTuples = 0;
+		ginxlogDeleteListPages data;
+		Buffer		buffers[GIN_NDELETE_AT_ONCE];
+		BlockNumber freespace[GIN_NDELETE_AT_ONCE];
+
+		data.ndeleted = 0;
+		while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
+		{
+			freespace[data.ndeleted] = blknoToDelete;
+			buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
+			LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
+			page = BufferGetPage(buffers[data.ndeleted]);
+
+			data.ndeleted++;
+
+			Assert(!GinPageIsDeleted(page));
+
+			nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff;
+			blknoToDelete = GinPageGetOpaque(page)->rightlink;
+		}
+
+		if (stats)
+			stats->pages_deleted += data.ndeleted;
+
+		/*
+		 * This operation touches an unusually large number of pages, so
+		 * prepare the XLogInsert machinery for that before entering the
+		 * critical section.
+		 */
+		if (RelationNeedsWAL(index))
+			XLogEnsureRecordSpace(data.ndeleted, 0);
+
+		START_CRIT_SECTION();
+
+		metadata->head = blknoToDelete;
+
+		Assert(metadata->nPendingPages >= data.ndeleted);
+		metadata->nPendingPages -= data.ndeleted;
+		Assert(metadata->nPendingHeapTuples >= nDeletedHeapTuples);
+		metadata->nPendingHeapTuples -= nDeletedHeapTuples;
+
+		if (blknoToDelete == InvalidBlockNumber)
+		{
+			metadata->tail = InvalidBlockNumber;
+			metadata->tailFreeSize = 0;
+			metadata->nPendingPages = 0;
+			metadata->nPendingHeapTuples = 0;
+		}
+
+		/*
+		 * Set pd_lower just past the end of the metadata.  This is essential,
+		 * because without doing so, metadata will be lost if xlog.c
+		 * compresses the page.  (We must do this here because pre-v11
+		 * versions of PG did not set the metapage's pd_lower correctly, so a
+		 * pg_upgraded index might contain the wrong value.)
+		 */
+		((PageHeader) metapage)->pd_lower =
+			((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
+
+		MarkBufferDirty(metabuffer);
+
+		for (i = 0; i < data.ndeleted; i++)
+		{
+			page = BufferGetPage(buffers[i]);
+			GinPageGetOpaque(page)->flags = GIN_DELETED;
+			MarkBufferDirty(buffers[i]);
+		}
+
+		if (RelationNeedsWAL(index))
+		{
+			XLogRecPtr	recptr;
+
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, metabuffer,
+							   REGBUF_WILL_INIT | REGBUF_STANDARD);
+			for (i = 0; i < data.ndeleted; i++)
+				XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
+
+			memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
+
+			XLogRegisterData((char *) &data,
+							 sizeof(ginxlogDeleteListPages));
+
+			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
+			PageSetLSN(metapage, recptr);
+
+			for (i = 0; i < data.ndeleted; i++)
+			{
+				page = BufferGetPage(buffers[i]);
+				PageSetLSN(page, recptr);
+			}
+		}
+
+		for (i = 0; i < data.ndeleted; i++)
+			UnlockReleaseBuffer(buffers[i]);
+
+		END_CRIT_SECTION();
+
+		for (i = 0; fill_fsm && i < data.ndeleted; i++)
+			RecordFreeIndexPage(index, freespace[i]);
+
+	} while (blknoToDelete != newHead);
+}
+
+/* Initialize empty KeyArray */
+static void
+initKeyArray(KeyArray *keys, int32 maxvalues)
+{
+	keys->keys = (Datum *) palloc(sizeof(Datum) * maxvalues);
+	keys->categories = (GinNullCategory *)
+		palloc(sizeof(GinNullCategory) * maxvalues);
+	keys->nvalues = 0;
+	keys->maxvalues = maxvalues;
+}
+
+/* Add datum to KeyArray, resizing if needed */
+static void
+addDatum(KeyArray *keys, Datum datum, GinNullCategory category)
+{
+	if (keys->nvalues >= keys->maxvalues)
+	{
+		keys->maxvalues *= 2;
+		keys->keys = (Datum *)
+			repalloc(keys->keys, sizeof(Datum) * keys->maxvalues);
+		keys->categories = (GinNullCategory *)
+			repalloc(keys->categories, sizeof(GinNullCategory) * keys->maxvalues);
+	}
+
+	keys->keys[keys->nvalues] = datum;
+	keys->categories[keys->nvalues] = category;
+	keys->nvalues++;
+}
+
+/*
+ * Collect data from a pending-list page in preparation for insertion into
+ * the main index.
+ *
+ * Go through all tuples >= startoff on page and collect values in accum
+ *
+ * Note that ka is just workspace --- it does not carry any state across
+ * calls.
+ */
+static void
+processPendingPage(BuildAccumulator *accum, KeyArray *ka,
+				   Page page, OffsetNumber startoff)
+{
+	ItemPointerData heapptr;
+	OffsetNumber i,
+				maxoff;
+	OffsetNumber attrnum;
+
+	/* reset *ka to empty */
+	ka->nvalues = 0;
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	Assert(maxoff >= FirstOffsetNumber);
+	ItemPointerSetInvalid(&heapptr);
+	attrnum = 0;
+
+	for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+		OffsetNumber curattnum;
+		Datum		curkey;
+		GinNullCategory curcategory;
+
+		/* Check for change of heap TID or attnum */
+		curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+		if (!ItemPointerIsValid(&heapptr))
+		{
+			heapptr = itup->t_tid;
+			attrnum = curattnum;
+		}
+		else if (!(ItemPointerEquals(&heapptr, &itup->t_tid) &&
+				   curattnum == attrnum))
+		{
+			/*
+			 * ginInsertBAEntries can insert several datums per call, but only
+			 * for one heap tuple and one column.  So call it at a boundary,
+			 * and reset ka.
+			 */
+			ginInsertBAEntries(accum, &heapptr, attrnum,
+							   ka->keys, ka->categories, ka->nvalues);
+			ka->nvalues = 0;
+			heapptr = itup->t_tid;
+			attrnum = curattnum;
+		}
+
+		/* Add key to KeyArray */
+		curkey = gintuple_get_key(accum->ginstate, itup, &curcategory);
+		addDatum(ka, curkey, curcategory);
+	}
+
+	/* Dump out all remaining keys */
+	ginInsertBAEntries(accum, &heapptr, attrnum,
+					   ka->keys, ka->categories, ka->nvalues);
+}
+
+/*
+ * Move tuples from pending pages into regular GIN structure.
+ *
+ * On first glance it looks completely not crash-safe. But if we crash
+ * after posting entries to the main index and before removing them from the
+ * pending list, it's okay because when we redo the posting later on, nothing
+ * bad will happen.
+ *
+ * fill_fsm indicates that ginInsertCleanup should add deleted pages
+ * to FSM otherwise caller is responsible to put deleted pages into
+ * FSM.
+ *
+ * If stats isn't null, we count deleted pending pages into the counts.
+ */
+void
+ginInsertCleanup(GinState *ginstate, bool full_clean,
+				 bool fill_fsm, bool forceCleanup,
+				 IndexBulkDeleteResult *stats)
+{
+	Relation	index = ginstate->index;
+	Buffer		metabuffer,
+				buffer;
+	Page		metapage,
+				page;
+	GinMetaPageData *metadata;
+	MemoryContext opCtx,
+				oldCtx;
+	BuildAccumulator accum;
+	KeyArray	datums;
+	BlockNumber blkno,
+				blknoFinish;
+	bool		cleanupFinish = false;
+	bool		fsm_vac = false;
+	Size		workMemory;
+
+	/*
+	 * We would like to prevent concurrent cleanup process. For that we will
+	 * lock metapage in exclusive mode using LockPage() call. Nobody other
+	 * will use that lock for metapage, so we keep possibility of concurrent
+	 * insertion into pending list
+	 */
+
+	if (forceCleanup)
+	{
+		/*
+		 * We are called from [auto]vacuum/analyze or gin_clean_pending_list()
+		 * and we would like to wait concurrent cleanup to finish.
+		 */
+		LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
+		workMemory =
+			(IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1) ?
+			autovacuum_work_mem : maintenance_work_mem;
+	}
+	else
+	{
+		/*
+		 * We are called from regular insert and if we see concurrent cleanup
+		 * just exit in hope that concurrent process will clean up pending
+		 * list.
+		 */
+		if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock))
+			return;
+		workMemory = work_mem;
+	}
+
+	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+	LockBuffer(metabuffer, GIN_SHARE);
+	metapage = BufferGetPage(metabuffer);
+	metadata = GinPageGetMeta(metapage);
+
+	if (metadata->head == InvalidBlockNumber)
+	{
+		/* Nothing to do */
+		UnlockReleaseBuffer(metabuffer);
+		UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Remember a tail page to prevent infinite cleanup if other backends add
+	 * new tuples faster than we can cleanup.
+	 */
+	blknoFinish = metadata->tail;
+
+	/*
+	 * Read and lock head of pending list
+	 */
+	blkno = metadata->head;
+	buffer = ReadBuffer(index, blkno);
+	LockBuffer(buffer, GIN_SHARE);
+	page = BufferGetPage(buffer);
+
+	LockBuffer(metabuffer, GIN_UNLOCK);
+
+	/*
+	 * Initialize.  All temporary space will be in opCtx
+	 */
+	opCtx = AllocSetContextCreate(CurrentMemoryContext,
+								  "GIN insert cleanup temporary context",
+								  ALLOCSET_DEFAULT_SIZES);
+
+	oldCtx = MemoryContextSwitchTo(opCtx);
+
+	initKeyArray(&datums, 128);
+	ginInitBA(&accum);
+	accum.ginstate = ginstate;
+
+	/*
+	 * At the top of this loop, we have pin and lock on the current page of
+	 * the pending list.  However, we'll release that before exiting the loop.
+	 * Note we also have pin but not lock on the metapage.
+	 */
+	for (;;)
+	{
+		Assert(!GinPageIsDeleted(page));
+
+		/*
+		 * Are we walk through the page which as we remember was a tail when
+		 * we start our cleanup?  But if caller asks us to clean up whole
+		 * pending list then ignore old tail, we will work until list becomes
+		 * empty.
+		 */
+		if (blkno == blknoFinish && full_clean == false)
+			cleanupFinish = true;
+
+		/*
+		 * read page's datums into accum
+		 */
+		processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+		vacuum_delay_point();
+
+		/*
+		 * Is it time to flush memory to disk?	Flush if we are at the end of
+		 * the pending list, or if we have a full row and memory is getting
+		 * full.
+		 */
+		if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+			(GinPageHasFullRow(page) &&
+			 (accum.allocatedMemory >= workMemory * 1024L)))
+		{
+			ItemPointerData *list;
+			uint32		nlist;
+			Datum		key;
+			GinNullCategory category;
+			OffsetNumber maxoff,
+						attnum;
+
+			/*
+			 * Unlock current page to increase performance. Changes of page
+			 * will be checked later by comparing maxoff after completion of
+			 * memory flush.
+			 */
+			maxoff = PageGetMaxOffsetNumber(page);
+			LockBuffer(buffer, GIN_UNLOCK);
+
+			/*
+			 * Moving collected data into regular structure can take
+			 * significant amount of time - so, run it without locking pending
+			 * list.
+			 */
+			ginBeginBAScan(&accum);
+			while ((list = ginGetBAEntry(&accum,
+										 &attnum, &key, &category, &nlist)) != NULL)
+			{
+				ginEntryInsert(ginstate, attnum, key, category,
+							   list, nlist, NULL);
+				vacuum_delay_point();
+			}
+
+			/*
+			 * Lock the whole list to remove pages
+			 */
+			LockBuffer(metabuffer, GIN_EXCLUSIVE);
+			LockBuffer(buffer, GIN_SHARE);
+
+			Assert(!GinPageIsDeleted(page));
+
+			/*
+			 * While we left the page unlocked, more stuff might have gotten
+			 * added to it.  If so, process those entries immediately.  There
+			 * shouldn't be very many, so we don't worry about the fact that
+			 * we're doing this with exclusive lock. Insertion algorithm
+			 * guarantees that inserted row(s) will not continue on next page.
+			 * NOTE: intentionally no vacuum_delay_point in this loop.
+			 */
+			if (PageGetMaxOffsetNumber(page) != maxoff)
+			{
+				ginInitBA(&accum);
+				processPendingPage(&accum, &datums, page, maxoff + 1);
+
+				ginBeginBAScan(&accum);
+				while ((list = ginGetBAEntry(&accum,
+											 &attnum, &key, &category, &nlist)) != NULL)
+					ginEntryInsert(ginstate, attnum, key, category,
+								   list, nlist, NULL);
+			}
+
+			/*
+			 * Remember next page - it will become the new list head
+			 */
+			blkno = GinPageGetOpaque(page)->rightlink;
+			UnlockReleaseBuffer(buffer);	/* shiftList will do exclusive
+											 * locking */
+
+			/*
+			 * remove read pages from pending list, at this point all content
+			 * of read pages is in regular structure
+			 */
+			shiftList(index, metabuffer, blkno, fill_fsm, stats);
+
+			/* At this point, some pending pages have been freed up */
+			fsm_vac = true;
+
+			Assert(blkno == metadata->head);
+			LockBuffer(metabuffer, GIN_UNLOCK);
+
+			/*
+			 * if we removed the whole pending list or we cleanup tail (which
+			 * we remembered on start our cleanup process) then just exit
+			 */
+			if (blkno == InvalidBlockNumber || cleanupFinish)
+				break;
+
+			/*
+			 * release memory used so far and reinit state
+			 */
+			MemoryContextReset(opCtx);
+			initKeyArray(&datums, datums.maxvalues);
+			ginInitBA(&accum);
+		}
+		else
+		{
+			blkno = GinPageGetOpaque(page)->rightlink;
+			UnlockReleaseBuffer(buffer);
+		}
+
+		/*
+		 * Read next page in pending list
+		 */
+		vacuum_delay_point();
+		buffer = ReadBuffer(index, blkno);
+		LockBuffer(buffer, GIN_SHARE);
+		page = BufferGetPage(buffer);
+	}
+
+	UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
+	ReleaseBuffer(metabuffer);
+
+	/*
+	 * As pending list pages can have a high churn rate, it is desirable to
+	 * recycle them immediately to the FreeSpaceMap when ordinary backends
+	 * clean the list.
+	 */
+	if (fsm_vac && fill_fsm)
+		IndexFreeSpaceMapVacuum(index);
+
+	/* Clean up temporary space */
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextDelete(opCtx);
+}
+
+/*
+ * SQL-callable function to clean the insert pending list
+ */
+Datum
+gin_clean_pending_list(PG_FUNCTION_ARGS)
+{
+	Oid			indexoid = PG_GETARG_OID(0);
+	Relation	indexRel = index_open(indexoid, RowExclusiveLock);
+	IndexBulkDeleteResult stats;
+	GinState	ginstate;
+
+	if (RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is in progress"),
+				 errhint("GIN pending list cannot be cleaned up during recovery.")));
+
+	/* Must be a GIN index */
+	if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+		indexRel->rd_rel->relam != GIN_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a GIN index",
+						RelationGetRelationName(indexRel))));
+
+	/*
+	 * Reject attempts to read non-local temporary relations; we would be
+	 * likely to get wrong data since we have no visibility into the owning
+	 * session's local buffers.
+	 */
+	if (RELATION_IS_OTHER_TEMP(indexRel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot access temporary indexes of other sessions")));
+
+	/* User must own the index (comparable to privileges needed for VACUUM) */
+	if (!pg_class_ownercheck(indexoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
+					   RelationGetRelationName(indexRel));
+
+	memset(&stats, 0, sizeof(stats));
+	initGinState(&ginstate, indexRel);
+	ginInsertCleanup(&ginstate, true, true, true, &stats);
+
+	index_close(indexRel, RowExclusiveLock);
+
+	PG_RETURN_INT64((int64) stats.pages_deleted);
+}
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
new file mode 100644
index 0000000..03191e0
--- /dev/null
+++ b/src/backend/access/gin/ginget.c
@@ -0,0 +1,1970 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginget.c
+ *	  fetch tuples from a GIN scan.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginget.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/relscan.h"
+#include "miscadmin.h"
+#include "storage/predicate.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+/* GUC parameter */
+int			GinFuzzySearchLimit = 0;
+
+typedef struct pendingPosition
+{
+	Buffer		pendingBuffer;
+	OffsetNumber firstOffset;
+	OffsetNumber lastOffset;
+	ItemPointerData item;
+	bool	   *hasMatchKey;
+} pendingPosition;
+
+
+/*
+ * Goes to the next page if current offset is outside of bounds
+ */
+static bool
+moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
+{
+	Page		page = BufferGetPage(stack->buffer);
+
+	if (stack->off > PageGetMaxOffsetNumber(page))
+	{
+		/*
+		 * We scanned the whole page, so we should take right page
+		 */
+		if (GinPageRightMost(page))
+			return false;		/* no more pages */
+
+		stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
+		stack->blkno = BufferGetBlockNumber(stack->buffer);
+		stack->off = FirstOffsetNumber;
+		PredicateLockPage(btree->index, stack->blkno, snapshot);
+	}
+
+	return true;
+}
+
+/*
+ * Scan all pages of a posting tree and save all its heap ItemPointers
+ * in scanEntry->matchBitmap
+ */
+static void
+scanPostingTree(Relation index, GinScanEntry scanEntry,
+				BlockNumber rootPostingTree, Snapshot snapshot)
+{
+	GinBtreeData btree;
+	GinBtreeStack *stack;
+	Buffer		buffer;
+	Page		page;
+
+	/* Descend to the leftmost leaf page */
+	stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
+	buffer = stack->buffer;
+
+	IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
+
+	freeGinBtreeStack(stack);
+
+	/*
+	 * Loop iterates through all leaf pages of posting tree
+	 */
+	for (;;)
+	{
+		page = BufferGetPage(buffer);
+		if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
+		{
+			int			n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
+
+			scanEntry->predictNumberResult += n;
+		}
+
+		if (GinPageRightMost(page))
+			break;				/* no more pages */
+
+		buffer = ginStepRight(buffer, index, GIN_SHARE);
+	}
+
+	UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
+ * match the search entry.  This supports three different match modes:
+ *
+ * 1. Partial-match support: scan from current point until the
+ *	  comparePartialFn says we're done.
+ * 2. SEARCH_MODE_ALL: scan from current point (which should be first
+ *	  key for the current attnum) until we hit null items or end of attnum
+ * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
+ *	  key for the current attnum) until we hit end of attnum
+ *
+ * Returns true if done, false if it's necessary to restart scan from scratch
+ */
+static bool
+collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
+				   GinScanEntry scanEntry, Snapshot snapshot)
+{
+	OffsetNumber attnum;
+	Form_pg_attribute attr;
+
+	/* Initialize empty bitmap result */
+	scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
+
+	/* Null query cannot partial-match anything */
+	if (scanEntry->isPartialMatch &&
+		scanEntry->queryCategory != GIN_CAT_NORM_KEY)
+		return true;
+
+	/* Locate tupdesc entry for key column (for attbyval/attlen data) */
+	attnum = scanEntry->attnum;
+	attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
+
+	/*
+	 * Predicate lock entry leaf page, following pages will be locked by
+	 * moveRightIfItNeeded()
+	 */
+	PredicateLockPage(btree->index, stack->buffer, snapshot);
+
+	for (;;)
+	{
+		Page		page;
+		IndexTuple	itup;
+		Datum		idatum;
+		GinNullCategory icategory;
+
+		/*
+		 * stack->off points to the interested entry, buffer is already locked
+		 */
+		if (moveRightIfItNeeded(btree, stack, snapshot) == false)
+			return true;
+
+		page = BufferGetPage(stack->buffer);
+		TestForOldSnapshot(snapshot, btree->index, page);
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+
+		/*
+		 * If tuple stores another attribute then stop scan
+		 */
+		if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
+			return true;
+
+		/* Safe to fetch attribute value */
+		idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
+
+		/*
+		 * Check for appropriate scan stop conditions
+		 */
+		if (scanEntry->isPartialMatch)
+		{
+			int32		cmp;
+
+			/*
+			 * In partial match, stop scan at any null (including
+			 * placeholders); partial matches never match nulls
+			 */
+			if (icategory != GIN_CAT_NORM_KEY)
+				return true;
+
+			/*----------
+			 * Check of partial match.
+			 * case cmp == 0 => match
+			 * case cmp > 0 => not match and finish scan
+			 * case cmp < 0 => not match and continue scan
+			 *----------
+			 */
+			cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
+												  btree->ginstate->supportCollation[attnum - 1],
+												  scanEntry->queryKey,
+												  idatum,
+												  UInt16GetDatum(scanEntry->strategy),
+												  PointerGetDatum(scanEntry->extra_data)));
+
+			if (cmp > 0)
+				return true;
+			else if (cmp < 0)
+			{
+				stack->off++;
+				continue;
+			}
+		}
+		else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
+		{
+			/*
+			 * In ALL mode, we are not interested in null items, so we can
+			 * stop if we get to a null-item placeholder (which will be the
+			 * last entry for a given attnum).  We do want to include NULL_KEY
+			 * and EMPTY_ITEM entries, though.
+			 */
+			if (icategory == GIN_CAT_NULL_ITEM)
+				return true;
+		}
+
+		/*
+		 * OK, we want to return the TIDs listed in this entry.
+		 */
+		if (GinIsPostingTree(itup))
+		{
+			BlockNumber rootPostingTree = GinGetPostingTree(itup);
+
+			/*
+			 * We should unlock current page (but not unpin) during tree scan
+			 * to prevent deadlock with vacuum processes.
+			 *
+			 * We save current entry value (idatum) to be able to re-find our
+			 * tuple after re-locking
+			 */
+			if (icategory == GIN_CAT_NORM_KEY)
+				idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
+
+			LockBuffer(stack->buffer, GIN_UNLOCK);
+
+			/*
+			 * Acquire predicate lock on the posting tree.  We already hold a
+			 * lock on the entry page, but insertions to the posting tree
+			 * don't check for conflicts on that level.
+			 */
+			PredicateLockPage(btree->index, rootPostingTree, snapshot);
+
+			/* Collect all the TIDs in this entry's posting tree */
+			scanPostingTree(btree->index, scanEntry, rootPostingTree,
+							snapshot);
+
+			/*
+			 * We lock again the entry page and while it was unlocked insert
+			 * might have occurred, so we need to re-find our position.
+			 */
+			LockBuffer(stack->buffer, GIN_SHARE);
+			page = BufferGetPage(stack->buffer);
+			if (!GinPageIsLeaf(page))
+			{
+				/*
+				 * Root page becomes non-leaf while we unlock it. We will
+				 * start again, this situation doesn't occur often - root can
+				 * became a non-leaf only once per life of index.
+				 */
+				return false;
+			}
+
+			/* Search forward to re-find idatum */
+			for (;;)
+			{
+				if (moveRightIfItNeeded(btree, stack, snapshot) == false)
+					ereport(ERROR,
+							(errcode(ERRCODE_INTERNAL_ERROR),
+							 errmsg("failed to re-find tuple within index \"%s\"",
+									RelationGetRelationName(btree->index))));
+
+				page = BufferGetPage(stack->buffer);
+				itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+
+				if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
+				{
+					Datum		newDatum;
+					GinNullCategory newCategory;
+
+					newDatum = gintuple_get_key(btree->ginstate, itup,
+												&newCategory);
+
+					if (ginCompareEntries(btree->ginstate, attnum,
+										  newDatum, newCategory,
+										  idatum, icategory) == 0)
+						break;	/* Found! */
+				}
+
+				stack->off++;
+			}
+
+			if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
+				pfree(DatumGetPointer(idatum));
+		}
+		else
+		{
+			ItemPointer ipd;
+			int			nipd;
+
+			ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
+			tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
+			scanEntry->predictNumberResult += GinGetNPosting(itup);
+			pfree(ipd);
+		}
+
+		/*
+		 * Done with this entry, go to the next
+		 */
+		stack->off++;
+	}
+}
+
+/*
+ * Start* functions setup beginning state of searches: finds correct buffer and pins it.
+ */
+static void
+startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
+{
+	GinBtreeData btreeEntry;
+	GinBtreeStack *stackEntry;
+	Page		page;
+	bool		needUnlock;
+
+restartScanEntry:
+	entry->buffer = InvalidBuffer;
+	ItemPointerSetMin(&entry->curItem);
+	entry->offset = InvalidOffsetNumber;
+	if (entry->list)
+		pfree(entry->list);
+	entry->list = NULL;
+	entry->nlist = 0;
+	entry->matchBitmap = NULL;
+	entry->matchResult = NULL;
+	entry->reduceResult = false;
+	entry->predictNumberResult = 0;
+
+	/*
+	 * we should find entry, and begin scan of posting tree or just store
+	 * posting list in memory
+	 */
+	ginPrepareEntryScan(&btreeEntry, entry->attnum,
+						entry->queryKey, entry->queryCategory,
+						ginstate);
+	stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
+	page = BufferGetPage(stackEntry->buffer);
+
+	/* ginFindLeafPage() will have already checked snapshot age. */
+	needUnlock = true;
+
+	entry->isFinished = true;
+
+	if (entry->isPartialMatch ||
+		entry->queryCategory == GIN_CAT_EMPTY_QUERY)
+	{
+		/*
+		 * btreeEntry.findItem locates the first item >= given search key.
+		 * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
+		 * because of the way the GIN_CAT_EMPTY_QUERY category code is
+		 * assigned.)  We scan forward from there and collect all TIDs needed
+		 * for the entry type.
+		 */
+		btreeEntry.findItem(&btreeEntry, stackEntry);
+		if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
+			== false)
+		{
+			/*
+			 * GIN tree was seriously restructured, so we will cleanup all
+			 * found data and rescan. See comments near 'return false' in
+			 * collectMatchBitmap()
+			 */
+			if (entry->matchBitmap)
+			{
+				if (entry->matchIterator)
+					tbm_end_iterate(entry->matchIterator);
+				entry->matchIterator = NULL;
+				tbm_free(entry->matchBitmap);
+				entry->matchBitmap = NULL;
+			}
+			LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+			freeGinBtreeStack(stackEntry);
+			goto restartScanEntry;
+		}
+
+		if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
+		{
+			entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
+			entry->isFinished = false;
+		}
+	}
+	else if (btreeEntry.findItem(&btreeEntry, stackEntry))
+	{
+		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
+
+		if (GinIsPostingTree(itup))
+		{
+			BlockNumber rootPostingTree = GinGetPostingTree(itup);
+			GinBtreeStack *stack;
+			Page		page;
+			ItemPointerData minItem;
+
+			/*
+			 * This is an equality scan, so lock the root of the posting tree.
+			 * It represents a lock on the exact key value, and covers all the
+			 * items in the posting tree.
+			 */
+			PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
+
+			/*
+			 * We should unlock entry page before touching posting tree to
+			 * prevent deadlocks with vacuum processes. Because entry is never
+			 * deleted from page and posting tree is never reduced to the
+			 * posting list, we can unlock page after getting BlockNumber of
+			 * root of posting tree.
+			 */
+			LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+			needUnlock = false;
+
+			stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
+											rootPostingTree, snapshot);
+			entry->buffer = stack->buffer;
+
+			/*
+			 * We keep buffer pinned because we need to prevent deletion of
+			 * page during scan. See GIN's vacuum implementation. RefCount is
+			 * increased to keep buffer pinned after freeGinBtreeStack() call.
+			 */
+			IncrBufferRefCount(entry->buffer);
+
+			page = BufferGetPage(entry->buffer);
+
+			/*
+			 * Load the first page into memory.
+			 */
+			ItemPointerSetMin(&minItem);
+			entry->list = GinDataLeafPageGetItems(page, &entry->nlist, minItem);
+
+			entry->predictNumberResult = stack->predictNumber * entry->nlist;
+
+			LockBuffer(entry->buffer, GIN_UNLOCK);
+			freeGinBtreeStack(stack);
+			entry->isFinished = false;
+		}
+		else
+		{
+			/*
+			 * Lock the entry leaf page.  This is more coarse-grained than
+			 * necessary, because it will conflict with any insertions that
+			 * land on the same leaf page, not only the exact key we searched
+			 * for.  But locking an individual tuple would require updating
+			 * that lock whenever it moves because of insertions or vacuums,
+			 * which seems too complicated.
+			 */
+			PredicateLockPage(ginstate->index,
+							  BufferGetBlockNumber(stackEntry->buffer),
+							  snapshot);
+			if (GinGetNPosting(itup) > 0)
+			{
+				entry->list = ginReadTuple(ginstate, entry->attnum, itup,
+										   &entry->nlist);
+				entry->predictNumberResult = entry->nlist;
+
+				entry->isFinished = false;
+			}
+		}
+	}
+	else
+	{
+		/*
+		 * No entry found.  Predicate lock the leaf page, to lock the place
+		 * where the entry would've been, had there been one.
+		 */
+		PredicateLockPage(ginstate->index,
+						  BufferGetBlockNumber(stackEntry->buffer), snapshot);
+	}
+
+	if (needUnlock)
+		LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+	freeGinBtreeStack(stackEntry);
+}
+
+/*
+ * Comparison function for scan entry indexes. Sorts by predictNumberResult,
+ * least frequent items first.
+ */
+static int
+entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
+{
+	const GinScanKey key = (const GinScanKey) arg;
+	int			i1 = *(const int *) a1;
+	int			i2 = *(const int *) a2;
+	uint32		n1 = key->scanEntry[i1]->predictNumberResult;
+	uint32		n2 = key->scanEntry[i2]->predictNumberResult;
+
+	if (n1 < n2)
+		return -1;
+	else if (n1 == n2)
+		return 0;
+	else
+		return 1;
+}
+
+static void
+startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
+{
+	MemoryContext oldCtx = CurrentMemoryContext;
+	int			i;
+	int			j;
+	int		   *entryIndexes;
+
+	ItemPointerSetMin(&key->curItem);
+	key->curItemMatches = false;
+	key->recheckCurItem = false;
+	key->isFinished = false;
+
+	/*
+	 * Divide the entries into two distinct sets: required and additional.
+	 * Additional entries are not enough for a match alone, without any items
+	 * from the required set, but are needed by the consistent function to
+	 * decide if an item matches. When scanning, we can skip over items from
+	 * additional entries that have no corresponding matches in any of the
+	 * required entries. That speeds up queries like "frequent & rare"
+	 * considerably, if the frequent term can be put in the additional set.
+	 *
+	 * There can be many legal ways to divide them entries into these two
+	 * sets. A conservative division is to just put everything in the required
+	 * set, but the more you can put in the additional set, the more you can
+	 * skip during the scan. To maximize skipping, we try to put as many
+	 * frequent items as possible into additional, and less frequent ones into
+	 * required. To do that, sort the entries by frequency
+	 * (predictNumberResult), and put entries into the required set in that
+	 * order, until the consistent function says that none of the remaining
+	 * entries can form a match, without any items from the required set. The
+	 * rest go to the additional set.
+	 *
+	 * Exclude-only scan keys are known to have no required entries.
+	 */
+	if (key->excludeOnly)
+	{
+		MemoryContextSwitchTo(so->keyCtx);
+
+		key->nrequired = 0;
+		key->nadditional = key->nentries;
+		key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
+		for (i = 0; i < key->nadditional; i++)
+			key->additionalEntries[i] = key->scanEntry[i];
+	}
+	else if (key->nentries > 1)
+	{
+		MemoryContextSwitchTo(so->tempCtx);
+
+		entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
+		for (i = 0; i < key->nentries; i++)
+			entryIndexes[i] = i;
+		qsort_arg(entryIndexes, key->nentries, sizeof(int),
+				  entryIndexByFrequencyCmp, key);
+
+		for (i = 0; i < key->nentries - 1; i++)
+		{
+			/* Pass all entries <= i as FALSE, and the rest as MAYBE */
+			for (j = 0; j <= i; j++)
+				key->entryRes[entryIndexes[j]] = GIN_FALSE;
+			for (j = i + 1; j < key->nentries; j++)
+				key->entryRes[entryIndexes[j]] = GIN_MAYBE;
+
+			if (key->triConsistentFn(key) == GIN_FALSE)
+				break;
+		}
+		/* i is now the last required entry. */
+
+		MemoryContextSwitchTo(so->keyCtx);
+
+		key->nrequired = i + 1;
+		key->nadditional = key->nentries - key->nrequired;
+		key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
+		key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
+
+		j = 0;
+		for (i = 0; i < key->nrequired; i++)
+			key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
+		for (i = 0; i < key->nadditional; i++)
+			key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
+
+		/* clean up after consistentFn calls (also frees entryIndexes) */
+		MemoryContextReset(so->tempCtx);
+	}
+	else
+	{
+		MemoryContextSwitchTo(so->keyCtx);
+
+		key->nrequired = 1;
+		key->nadditional = 0;
+		key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
+		key->requiredEntries[0] = key->scanEntry[0];
+	}
+	MemoryContextSwitchTo(oldCtx);
+}
+
+static void
+startScan(IndexScanDesc scan)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	GinState   *ginstate = &so->ginstate;
+	uint32		i;
+
+	for (i = 0; i < so->totalentries; i++)
+		startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
+
+	if (GinFuzzySearchLimit > 0)
+	{
+		/*
+		 * If all of keys more than threshold we will try to reduce result, we
+		 * hope (and only hope, for intersection operation of array our
+		 * supposition isn't true), that total result will not more than
+		 * minimal predictNumberResult.
+		 */
+		bool		reduce = true;
+
+		for (i = 0; i < so->totalentries; i++)
+		{
+			if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
+			{
+				reduce = false;
+				break;
+			}
+		}
+		if (reduce)
+		{
+			for (i = 0; i < so->totalentries; i++)
+			{
+				so->entries[i]->predictNumberResult /= so->totalentries;
+				so->entries[i]->reduceResult = true;
+			}
+		}
+	}
+
+	/*
+	 * Now that we have the estimates for the entry frequencies, finish
+	 * initializing the scan keys.
+	 */
+	for (i = 0; i < so->nkeys; i++)
+		startScanKey(ginstate, so, so->keys + i);
+}
+
+/*
+ * Load the next batch of item pointers from a posting tree.
+ *
+ * Note that we copy the page into GinScanEntry->list array and unlock it, but
+ * keep it pinned to prevent interference with vacuum.
+ */
+static void
+entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
+				   ItemPointerData advancePast, Snapshot snapshot)
+{
+	Page		page;
+	int			i;
+	bool		stepright;
+
+	if (!BufferIsValid(entry->buffer))
+	{
+		entry->isFinished = true;
+		return;
+	}
+
+	/*
+	 * We have two strategies for finding the correct page: step right from
+	 * the current page, or descend the tree again from the root. If
+	 * advancePast equals the current item, the next matching item should be
+	 * on the next page, so we step right. Otherwise, descend from root.
+	 */
+	if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
+	{
+		stepright = true;
+		LockBuffer(entry->buffer, GIN_SHARE);
+	}
+	else
+	{
+		GinBtreeStack *stack;
+
+		ReleaseBuffer(entry->buffer);
+
+		/*
+		 * Set the search key, and find the correct leaf page.
+		 */
+		if (ItemPointerIsLossyPage(&advancePast))
+		{
+			ItemPointerSet(&entry->btree.itemptr,
+						   GinItemPointerGetBlockNumber(&advancePast) + 1,
+						   FirstOffsetNumber);
+		}
+		else
+		{
+			ItemPointerSet(&entry->btree.itemptr,
+						   GinItemPointerGetBlockNumber(&advancePast),
+						   OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
+		}
+		entry->btree.fullScan = false;
+		stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
+
+		/* we don't need the stack, just the buffer. */
+		entry->buffer = stack->buffer;
+		IncrBufferRefCount(entry->buffer);
+		freeGinBtreeStack(stack);
+		stepright = false;
+	}
+
+	elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
+		 GinItemPointerGetBlockNumber(&advancePast),
+		 GinItemPointerGetOffsetNumber(&advancePast),
+		 !stepright);
+
+	page = BufferGetPage(entry->buffer);
+	for (;;)
+	{
+		entry->offset = InvalidOffsetNumber;
+		if (entry->list)
+		{
+			pfree(entry->list);
+			entry->list = NULL;
+			entry->nlist = 0;
+		}
+
+		if (stepright)
+		{
+			/*
+			 * We've processed all the entries on this page. If it was the
+			 * last page in the tree, we're done.
+			 */
+			if (GinPageRightMost(page))
+			{
+				UnlockReleaseBuffer(entry->buffer);
+				entry->buffer = InvalidBuffer;
+				entry->isFinished = true;
+				return;
+			}
+
+			/*
+			 * Step to next page, following the right link. then find the
+			 * first ItemPointer greater than advancePast.
+			 */
+			entry->buffer = ginStepRight(entry->buffer,
+										 ginstate->index,
+										 GIN_SHARE);
+			page = BufferGetPage(entry->buffer);
+		}
+		stepright = true;
+
+		if (GinPageGetOpaque(page)->flags & GIN_DELETED)
+			continue;			/* page was deleted by concurrent vacuum */
+
+		/*
+		 * The first item > advancePast might not be on this page, but
+		 * somewhere to the right, if the page was split, or a non-match from
+		 * another key in the query allowed us to skip some items from this
+		 * entry. Keep following the right-links until we re-find the correct
+		 * page.
+		 */
+		if (!GinPageRightMost(page) &&
+			ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
+		{
+			/*
+			 * the item we're looking is > the right bound of the page, so it
+			 * can't be on this page.
+			 */
+			continue;
+		}
+
+		entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
+
+		for (i = 0; i < entry->nlist; i++)
+		{
+			if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
+			{
+				entry->offset = i;
+
+				if (GinPageRightMost(page))
+				{
+					/* after processing the copied items, we're done. */
+					UnlockReleaseBuffer(entry->buffer);
+					entry->buffer = InvalidBuffer;
+				}
+				else
+					LockBuffer(entry->buffer, GIN_UNLOCK);
+				return;
+			}
+		}
+	}
+}
+
+#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
+#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
+
+/*
+ * Sets entry->curItem to next heap item pointer > advancePast, for one entry
+ * of one scan key, or sets entry->isFinished to true if there are no more.
+ *
+ * Item pointers are returned in ascending order.
+ *
+ * Note: this can return a "lossy page" item pointer, indicating that the
+ * entry potentially matches all items on that heap page.  However, it is
+ * not allowed to return both a lossy page pointer and exact (regular)
+ * item pointers for the same page.  (Doing so would break the key-combination
+ * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
+ * current implementation this is guaranteed by the behavior of tidbitmaps.
+ */
+static void
+entryGetItem(GinState *ginstate, GinScanEntry entry,
+			 ItemPointerData advancePast, Snapshot snapshot)
+{
+	Assert(!entry->isFinished);
+
+	Assert(!ItemPointerIsValid(&entry->curItem) ||
+		   ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
+
+	if (entry->matchBitmap)
+	{
+		/* A bitmap result */
+		BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
+		OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
+
+		for (;;)
+		{
+			/*
+			 * If we've exhausted all items on this block, move to next block
+			 * in the bitmap.
+			 */
+			while (entry->matchResult == NULL ||
+				   (entry->matchResult->ntuples >= 0 &&
+					entry->offset >= entry->matchResult->ntuples) ||
+				   entry->matchResult->blockno < advancePastBlk ||
+				   (ItemPointerIsLossyPage(&advancePast) &&
+					entry->matchResult->blockno == advancePastBlk))
+			{
+				entry->matchResult = tbm_iterate(entry->matchIterator);
+
+				if (entry->matchResult == NULL)
+				{
+					ItemPointerSetInvalid(&entry->curItem);
+					tbm_end_iterate(entry->matchIterator);
+					entry->matchIterator = NULL;
+					entry->isFinished = true;
+					break;
+				}
+
+				/*
+				 * Reset counter to the beginning of entry->matchResult. Note:
+				 * entry->offset is still greater than matchResult->ntuples if
+				 * matchResult is lossy.  So, on next call we will get next
+				 * result from TIDBitmap.
+				 */
+				entry->offset = 0;
+			}
+			if (entry->isFinished)
+				break;
+
+			/*
+			 * We're now on the first page after advancePast which has any
+			 * items on it. If it's a lossy result, return that.
+			 */
+			if (entry->matchResult->ntuples < 0)
+			{
+				ItemPointerSetLossyPage(&entry->curItem,
+										entry->matchResult->blockno);
+
+				/*
+				 * We might as well fall out of the loop; we could not
+				 * estimate number of results on this page to support correct
+				 * reducing of result even if it's enabled.
+				 */
+				break;
+			}
+
+			/*
+			 * Not a lossy page. Skip over any offsets <= advancePast, and
+			 * return that.
+			 */
+			if (entry->matchResult->blockno == advancePastBlk)
+			{
+				/*
+				 * First, do a quick check against the last offset on the
+				 * page. If that's > advancePast, so are all the other
+				 * offsets, so just go back to the top to get the next page.
+				 */
+				if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
+				{
+					entry->offset = entry->matchResult->ntuples;
+					continue;
+				}
+
+				/* Otherwise scan to find the first item > advancePast */
+				while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
+					entry->offset++;
+			}
+
+			ItemPointerSet(&entry->curItem,
+						   entry->matchResult->blockno,
+						   entry->matchResult->offsets[entry->offset]);
+			entry->offset++;
+
+			/* Done unless we need to reduce the result */
+			if (!entry->reduceResult || !dropItem(entry))
+				break;
+		}
+	}
+	else if (!BufferIsValid(entry->buffer))
+	{
+		/*
+		 * A posting list from an entry tuple, or the last page of a posting
+		 * tree.
+		 */
+		for (;;)
+		{
+			if (entry->offset >= entry->nlist)
+			{
+				ItemPointerSetInvalid(&entry->curItem);
+				entry->isFinished = true;
+				break;
+			}
+
+			entry->curItem = entry->list[entry->offset++];
+
+			/* If we're not past advancePast, keep scanning */
+			if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+				continue;
+
+			/* Done unless we need to reduce the result */
+			if (!entry->reduceResult || !dropItem(entry))
+				break;
+		}
+	}
+	else
+	{
+		/* A posting tree */
+		for (;;)
+		{
+			/* If we've processed the current batch, load more items */
+			while (entry->offset >= entry->nlist)
+			{
+				entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
+
+				if (entry->isFinished)
+				{
+					ItemPointerSetInvalid(&entry->curItem);
+					return;
+				}
+			}
+
+			entry->curItem = entry->list[entry->offset++];
+
+			/* If we're not past advancePast, keep scanning */
+			if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+				continue;
+
+			/* Done unless we need to reduce the result */
+			if (!entry->reduceResult || !dropItem(entry))
+				break;
+
+			/*
+			 * Advance advancePast (so that entryLoadMoreItems will load the
+			 * right data), and keep scanning
+			 */
+			advancePast = entry->curItem;
+		}
+	}
+}
+
+/*
+ * Identify the "current" item among the input entry streams for this scan key
+ * that is greater than advancePast, and test whether it passes the scan key
+ * qual condition.
+ *
+ * The current item is the smallest curItem among the inputs.  key->curItem
+ * is set to that value.  key->curItemMatches is set to indicate whether that
+ * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
+ * iff recheck is needed for this item pointer (including the case where the
+ * item pointer is a lossy page pointer).
+ *
+ * If all entry streams are exhausted, sets key->isFinished to true.
+ *
+ * Item pointers must be returned in ascending order.
+ *
+ * Note: this can return a "lossy page" item pointer, indicating that the
+ * key potentially matches all items on that heap page.  However, it is
+ * not allowed to return both a lossy page pointer and exact (regular)
+ * item pointers for the same page.  (Doing so would break the key-combination
+ * logic in scanGetItem.)
+ */
+static void
+keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
+		   ItemPointerData advancePast, Snapshot snapshot)
+{
+	ItemPointerData minItem;
+	ItemPointerData curPageLossy;
+	uint32		i;
+	bool		haveLossyEntry;
+	GinScanEntry entry;
+	GinTernaryValue res;
+	MemoryContext oldCtx;
+	bool		allFinished;
+
+	Assert(!key->isFinished);
+
+	/*
+	 * We might have already tested this item; if so, no need to repeat work.
+	 * (Note: the ">" case can happen, if advancePast is exact but we
+	 * previously had to set curItem to a lossy-page pointer.)
+	 */
+	if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
+		return;
+
+	/*
+	 * Find the minimum item > advancePast among the active entry streams.
+	 *
+	 * Note: a lossy-page entry is encoded by a ItemPointer with max value for
+	 * offset (0xffff), so that it will sort after any exact entries for the
+	 * same page.  So we'll prefer to return exact pointers not lossy
+	 * pointers, which is good.
+	 */
+	ItemPointerSetMax(&minItem);
+	allFinished = true;
+	for (i = 0; i < key->nrequired; i++)
+	{
+		entry = key->requiredEntries[i];
+
+		if (entry->isFinished)
+			continue;
+
+		/*
+		 * Advance this stream if necessary.
+		 *
+		 * In particular, since entry->curItem was initialized with
+		 * ItemPointerSetMin, this ensures we fetch the first item for each
+		 * entry on the first call.
+		 */
+		if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+		{
+			entryGetItem(ginstate, entry, advancePast, snapshot);
+			if (entry->isFinished)
+				continue;
+		}
+
+		allFinished = false;
+		if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
+			minItem = entry->curItem;
+	}
+
+	if (allFinished && !key->excludeOnly)
+	{
+		/* all entries are finished */
+		key->isFinished = true;
+		return;
+	}
+
+	if (!key->excludeOnly)
+	{
+		/*
+		 * For a normal scan key, we now know there are no matches < minItem.
+		 *
+		 * If minItem is lossy, it means that there were no exact items on the
+		 * page among requiredEntries, because lossy pointers sort after exact
+		 * items. However, there might be exact items for the same page among
+		 * additionalEntries, so we mustn't advance past them.
+		 */
+		if (ItemPointerIsLossyPage(&minItem))
+		{
+			if (GinItemPointerGetBlockNumber(&advancePast) <
+				GinItemPointerGetBlockNumber(&minItem))
+			{
+				ItemPointerSet(&advancePast,
+							   GinItemPointerGetBlockNumber(&minItem),
+							   InvalidOffsetNumber);
+			}
+		}
+		else
+		{
+			Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
+			ItemPointerSet(&advancePast,
+						   GinItemPointerGetBlockNumber(&minItem),
+						   OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
+		}
+	}
+	else
+	{
+		/*
+		 * excludeOnly scan keys don't have any entries that are necessarily
+		 * present in matching items.  So, we consider the item just after
+		 * advancePast.
+		 */
+		Assert(key->nrequired == 0);
+		ItemPointerSet(&minItem,
+					   GinItemPointerGetBlockNumber(&advancePast),
+					   OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
+	}
+
+	/*
+	 * We might not have loaded all the entry streams for this TID yet. We
+	 * could call the consistent function, passing MAYBE for those entries, to
+	 * see if it can decide if this TID matches based on the information we
+	 * have. But if the consistent-function is expensive, and cannot in fact
+	 * decide with partial information, that could be a big loss. So, load all
+	 * the additional entries, before calling the consistent function.
+	 */
+	for (i = 0; i < key->nadditional; i++)
+	{
+		entry = key->additionalEntries[i];
+
+		if (entry->isFinished)
+			continue;
+
+		if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+		{
+			entryGetItem(ginstate, entry, advancePast, snapshot);
+			if (entry->isFinished)
+				continue;
+		}
+
+		/*
+		 * Normally, none of the items in additionalEntries can have a curItem
+		 * larger than minItem. But if minItem is a lossy page, then there
+		 * might be exact items on the same page among additionalEntries.
+		 */
+		if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
+		{
+			Assert(ItemPointerIsLossyPage(&minItem));
+			minItem = entry->curItem;
+		}
+	}
+
+	/*
+	 * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
+	 * and perform consistentFn test.
+	 *
+	 * Lossy-page entries pose a problem, since we don't know the correct
+	 * entryRes state to pass to the consistentFn, and we also don't know what
+	 * its combining logic will be (could be AND, OR, or even NOT). If the
+	 * logic is OR then the consistentFn might succeed for all items in the
+	 * lossy page even when none of the other entries match.
+	 *
+	 * Our strategy is to call the tri-state consistent function, with the
+	 * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
+	 * returns FALSE, none of the lossy items alone are enough for a match, so
+	 * we don't need to return a lossy-page pointer. Otherwise, return a
+	 * lossy-page pointer to indicate that the whole heap page must be
+	 * checked.  (On subsequent calls, we'll do nothing until minItem is past
+	 * the page altogether, thus ensuring that we never return both regular
+	 * and lossy pointers for the same page.)
+	 *
+	 * An exception is that it doesn't matter what we pass for lossy pointers
+	 * in "hidden" entries, because the consistentFn's result can't depend on
+	 * them. We could pass them as MAYBE as well, but if we're using the
+	 * "shim" implementation of a tri-state consistent function (see
+	 * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
+	 * them as true.
+	 *
+	 * Note that only lossy-page entries pointing to the current item's page
+	 * should trigger this processing; we might have future lossy pages in the
+	 * entry array, but they aren't relevant yet.
+	 */
+	key->curItem = minItem;
+	ItemPointerSetLossyPage(&curPageLossy,
+							GinItemPointerGetBlockNumber(&key->curItem));
+	haveLossyEntry = false;
+	for (i = 0; i < key->nentries; i++)
+	{
+		entry = key->scanEntry[i];
+		if (entry->isFinished == false &&
+			ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
+		{
+			if (i < key->nuserentries)
+				key->entryRes[i] = GIN_MAYBE;
+			else
+				key->entryRes[i] = GIN_TRUE;
+			haveLossyEntry = true;
+		}
+		else
+			key->entryRes[i] = GIN_FALSE;
+	}
+
+	/* prepare for calling consistentFn in temp context */
+	oldCtx = MemoryContextSwitchTo(tempCtx);
+
+	if (haveLossyEntry)
+	{
+		/* Have lossy-page entries, so see if whole page matches */
+		res = key->triConsistentFn(key);
+
+		if (res == GIN_TRUE || res == GIN_MAYBE)
+		{
+			/* Yes, so clean up ... */
+			MemoryContextSwitchTo(oldCtx);
+			MemoryContextReset(tempCtx);
+
+			/* and return lossy pointer for whole page */
+			key->curItem = curPageLossy;
+			key->curItemMatches = true;
+			key->recheckCurItem = true;
+			return;
+		}
+	}
+
+	/*
+	 * At this point we know that we don't need to return a lossy whole-page
+	 * pointer, but we might have matches for individual exact item pointers,
+	 * possibly in combination with a lossy pointer. Pass lossy pointers as
+	 * MAYBE to the ternary consistent function, to let it decide if this
+	 * tuple satisfies the overall key, even though we don't know if the lossy
+	 * entries match.
+	 *
+	 * Prepare entryRes array to be passed to consistentFn.
+	 */
+	for (i = 0; i < key->nentries; i++)
+	{
+		entry = key->scanEntry[i];
+		if (entry->isFinished)
+			key->entryRes[i] = GIN_FALSE;
+#if 0
+
+		/*
+		 * This case can't currently happen, because we loaded all the entries
+		 * for this item earlier.
+		 */
+		else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+			key->entryRes[i] = GIN_MAYBE;
+#endif
+		else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
+			key->entryRes[i] = GIN_MAYBE;
+		else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
+			key->entryRes[i] = GIN_TRUE;
+		else
+			key->entryRes[i] = GIN_FALSE;
+	}
+
+	res = key->triConsistentFn(key);
+
+	switch (res)
+	{
+		case GIN_TRUE:
+			key->curItemMatches = true;
+			/* triConsistentFn set recheckCurItem */
+			break;
+
+		case GIN_FALSE:
+			key->curItemMatches = false;
+			break;
+
+		case GIN_MAYBE:
+			key->curItemMatches = true;
+			key->recheckCurItem = true;
+			break;
+
+		default:
+
+			/*
+			 * the 'default' case shouldn't happen, but if the consistent
+			 * function returns something bogus, this is the safe result
+			 */
+			key->curItemMatches = true;
+			key->recheckCurItem = true;
+			break;
+	}
+
+	/*
+	 * We have a tuple, and we know if it matches or not. If it's a non-match,
+	 * we could continue to find the next matching tuple, but let's break out
+	 * and give scanGetItem a chance to advance the other keys. They might be
+	 * able to skip past to a much higher TID, allowing us to save work.
+	 */
+
+	/* clean up after consistentFn calls */
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextReset(tempCtx);
+}
+
+/*
+ * Get next heap item pointer (after advancePast) from scan.
+ * Returns true if anything found.
+ * On success, *item and *recheck are set.
+ *
+ * Note: this is very nearly the same logic as in keyGetItem(), except
+ * that we know the keys are to be combined with AND logic, whereas in
+ * keyGetItem() the combination logic is known only to the consistentFn.
+ */
+static bool
+scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
+			ItemPointerData *item, bool *recheck)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	uint32		i;
+	bool		match;
+
+	/*----------
+	 * Advance the scan keys in lock-step, until we find an item that matches
+	 * all the keys. If any key reports isFinished, meaning its subset of the
+	 * entries is exhausted, we can stop.  Otherwise, set *item to the next
+	 * matching item.
+	 *
+	 * This logic works only if a keyGetItem stream can never contain both
+	 * exact and lossy pointers for the same page.  Else we could have a
+	 * case like
+	 *
+	 *		stream 1		stream 2
+	 *		...             ...
+	 *		42/6			42/7
+	 *		50/1			42/0xffff
+	 *		...             ...
+	 *
+	 * We would conclude that 42/6 is not a match and advance stream 1,
+	 * thus never detecting the match to the lossy pointer in stream 2.
+	 * (keyGetItem has a similar problem versus entryGetItem.)
+	 *----------
+	 */
+	do
+	{
+		ItemPointerSetMin(item);
+		match = true;
+		for (i = 0; i < so->nkeys && match; i++)
+		{
+			GinScanKey	key = so->keys + i;
+
+			/*
+			 * If we're considering a lossy page, skip excludeOnly keys,  They
+			 * can't exclude the whole page anyway.
+			 */
+			if (ItemPointerIsLossyPage(item) && key->excludeOnly)
+			{
+				/*
+				 * ginNewScanKey() should never mark the first key as
+				 * excludeOnly.
+				 */
+				Assert(i > 0);
+				continue;
+			}
+
+			/* Fetch the next item for this key that is > advancePast. */
+			keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
+					   scan->xs_snapshot);
+
+			if (key->isFinished)
+				return false;
+
+			/*
+			 * If it's not a match, we can immediately conclude that nothing
+			 * <= this item matches, without checking the rest of the keys.
+			 */
+			if (!key->curItemMatches)
+			{
+				advancePast = key->curItem;
+				match = false;
+				break;
+			}
+
+			/*
+			 * It's a match. We can conclude that nothing < matches, so the
+			 * other key streams can skip to this item.
+			 *
+			 * Beware of lossy pointers, though; from a lossy pointer, we can
+			 * only conclude that nothing smaller than this *block* matches.
+			 */
+			if (ItemPointerIsLossyPage(&key->curItem))
+			{
+				if (GinItemPointerGetBlockNumber(&advancePast) <
+					GinItemPointerGetBlockNumber(&key->curItem))
+				{
+					ItemPointerSet(&advancePast,
+								   GinItemPointerGetBlockNumber(&key->curItem),
+								   InvalidOffsetNumber);
+				}
+			}
+			else
+			{
+				Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
+				ItemPointerSet(&advancePast,
+							   GinItemPointerGetBlockNumber(&key->curItem),
+							   OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
+			}
+
+			/*
+			 * If this is the first key, remember this location as a potential
+			 * match, and proceed to check the rest of the keys.
+			 *
+			 * Otherwise, check if this is the same item that we checked the
+			 * previous keys for (or a lossy pointer for the same page). If
+			 * not, loop back to check the previous keys for this item (we
+			 * will check this key again too, but keyGetItem returns quickly
+			 * for that)
+			 */
+			if (i == 0)
+			{
+				*item = key->curItem;
+			}
+			else
+			{
+				if (ItemPointerIsLossyPage(&key->curItem) ||
+					ItemPointerIsLossyPage(item))
+				{
+					Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
+					match = (GinItemPointerGetBlockNumber(&key->curItem) ==
+							 GinItemPointerGetBlockNumber(item));
+				}
+				else
+				{
+					Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
+					match = (ginCompareItemPointers(&key->curItem, item) == 0);
+				}
+			}
+		}
+	} while (!match);
+
+	Assert(!ItemPointerIsMin(item));
+
+	/*
+	 * Now *item contains the first ItemPointer after previous result that
+	 * satisfied all the keys for that exact TID, or a lossy reference to the
+	 * same page.
+	 *
+	 * We must return recheck = true if any of the keys are marked recheck.
+	 */
+	*recheck = false;
+	for (i = 0; i < so->nkeys; i++)
+	{
+		GinScanKey	key = so->keys + i;
+
+		if (key->recheckCurItem)
+		{
+			*recheck = true;
+			break;
+		}
+	}
+
+	return true;
+}
+
+
+/*
+ * Functions for scanning the pending list
+ */
+
+
+/*
+ * Get ItemPointer of next heap row to be checked from pending list.
+ * Returns false if there are no more. On pages with several heap rows
+ * it returns each row separately, on page with part of heap row returns
+ * per page data.  pos->firstOffset and pos->lastOffset are set to identify
+ * the range of pending-list tuples belonging to this heap row.
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit.  On failure exit it's released.
+ */
+static bool
+scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
+{
+	OffsetNumber maxoff;
+	Page		page;
+	IndexTuple	itup;
+
+	ItemPointerSetInvalid(&pos->item);
+	for (;;)
+	{
+		page = BufferGetPage(pos->pendingBuffer);
+		TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
+
+		maxoff = PageGetMaxOffsetNumber(page);
+		if (pos->firstOffset > maxoff)
+		{
+			BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+
+			if (blkno == InvalidBlockNumber)
+			{
+				UnlockReleaseBuffer(pos->pendingBuffer);
+				pos->pendingBuffer = InvalidBuffer;
+
+				return false;
+			}
+			else
+			{
+				/*
+				 * Here we must prevent deletion of next page by insertcleanup
+				 * process, which may be trying to obtain exclusive lock on
+				 * current page.  So, we lock next page before releasing the
+				 * current one
+				 */
+				Buffer		tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+				LockBuffer(tmpbuf, GIN_SHARE);
+				UnlockReleaseBuffer(pos->pendingBuffer);
+
+				pos->pendingBuffer = tmpbuf;
+				pos->firstOffset = FirstOffsetNumber;
+			}
+		}
+		else
+		{
+			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+			pos->item = itup->t_tid;
+			if (GinPageHasFullRow(page))
+			{
+				/*
+				 * find itempointer to the next row
+				 */
+				for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
+				{
+					itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+					if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+						break;
+				}
+			}
+			else
+			{
+				/*
+				 * All itempointers are the same on this page
+				 */
+				pos->lastOffset = maxoff + 1;
+			}
+
+			/*
+			 * Now pos->firstOffset points to the first tuple of current heap
+			 * row, pos->lastOffset points to the first tuple of next heap row
+			 * (or to the end of page)
+			 */
+			break;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * Scan pending-list page from current tuple (off) up till the first of:
+ * - match is found (then returns true)
+ * - no later match is possible
+ * - tuple's attribute number is not equal to entry's attrnum
+ * - reach end of page
+ *
+ * datum[]/category[]/datumExtracted[] arrays are used to cache the results
+ * of gintuple_get_key() on the current page.
+ */
+static bool
+matchPartialInPendingList(GinState *ginstate, Page page,
+						  OffsetNumber off, OffsetNumber maxoff,
+						  GinScanEntry entry,
+						  Datum *datum, GinNullCategory *category,
+						  bool *datumExtracted)
+{
+	IndexTuple	itup;
+	int32		cmp;
+
+	/* Partial match to a null is not possible */
+	if (entry->queryCategory != GIN_CAT_NORM_KEY)
+		return false;
+
+	while (off < maxoff)
+	{
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+
+		if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
+			return false;
+
+		if (datumExtracted[off - 1] == false)
+		{
+			datum[off - 1] = gintuple_get_key(ginstate, itup,
+											  &category[off - 1]);
+			datumExtracted[off - 1] = true;
+		}
+
+		/* Once we hit nulls, no further match is possible */
+		if (category[off - 1] != GIN_CAT_NORM_KEY)
+			return false;
+
+		/*----------
+		 * Check partial match.
+		 * case cmp == 0 => match
+		 * case cmp > 0 => not match and end scan (no later match possible)
+		 * case cmp < 0 => not match and continue scan
+		 *----------
+		 */
+		cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
+											  ginstate->supportCollation[entry->attnum - 1],
+											  entry->queryKey,
+											  datum[off - 1],
+											  UInt16GetDatum(entry->strategy),
+											  PointerGetDatum(entry->extra_data)));
+		if (cmp == 0)
+			return true;
+		else if (cmp > 0)
+			return false;
+
+		off++;
+	}
+
+	return false;
+}
+
+/*
+ * Set up the entryRes array for each key by looking at
+ * every entry for current heap row in pending list.
+ *
+ * Returns true if each scan key has at least one entryRes match.
+ * This corresponds to the situations where the normal index search will
+ * try to apply the key's consistentFn.  (A tuple not meeting that requirement
+ * cannot be returned by the normal search since no entry stream will
+ * source its TID.)
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry.
+ */
+static bool
+collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	OffsetNumber attrnum;
+	Page		page;
+	IndexTuple	itup;
+	int			i,
+				j;
+
+	/*
+	 * Reset all entryRes and hasMatchKey flags
+	 */
+	for (i = 0; i < so->nkeys; i++)
+	{
+		GinScanKey	key = so->keys + i;
+
+		memset(key->entryRes, GIN_FALSE, key->nentries);
+	}
+	memset(pos->hasMatchKey, false, so->nkeys);
+
+	/*
+	 * Outer loop iterates over multiple pending-list pages when a single heap
+	 * row has entries spanning those pages.
+	 */
+	for (;;)
+	{
+		Datum		datum[BLCKSZ / sizeof(IndexTupleData)];
+		GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
+		bool		datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
+
+		Assert(pos->lastOffset > pos->firstOffset);
+		memset(datumExtracted + pos->firstOffset - 1, 0,
+			   sizeof(bool) * (pos->lastOffset - pos->firstOffset));
+
+		page = BufferGetPage(pos->pendingBuffer);
+		TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
+
+		for (i = 0; i < so->nkeys; i++)
+		{
+			GinScanKey	key = so->keys + i;
+
+			for (j = 0; j < key->nentries; j++)
+			{
+				GinScanEntry entry = key->scanEntry[j];
+				OffsetNumber StopLow = pos->firstOffset,
+							StopHigh = pos->lastOffset,
+							StopMiddle;
+
+				/* If already matched on earlier page, do no extra work */
+				if (key->entryRes[j])
+					continue;
+
+				/*
+				 * Interesting tuples are from pos->firstOffset to
+				 * pos->lastOffset and they are ordered by (attnum, Datum) as
+				 * it's done in entry tree.  So we can use binary search to
+				 * avoid linear scanning.
+				 */
+				while (StopLow < StopHigh)
+				{
+					int			res;
+
+					StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+					itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+
+					attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+					if (key->attnum < attrnum)
+					{
+						StopHigh = StopMiddle;
+						continue;
+					}
+					if (key->attnum > attrnum)
+					{
+						StopLow = StopMiddle + 1;
+						continue;
+					}
+
+					if (datumExtracted[StopMiddle - 1] == false)
+					{
+						datum[StopMiddle - 1] =
+							gintuple_get_key(&so->ginstate, itup,
+											 &category[StopMiddle - 1]);
+						datumExtracted[StopMiddle - 1] = true;
+					}
+
+					if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
+					{
+						/* special behavior depending on searchMode */
+						if (entry->searchMode == GIN_SEARCH_MODE_ALL)
+						{
+							/* match anything except NULL_ITEM */
+							if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
+								res = -1;
+							else
+								res = 0;
+						}
+						else
+						{
+							/* match everything */
+							res = 0;
+						}
+					}
+					else
+					{
+						res = ginCompareEntries(&so->ginstate,
+												entry->attnum,
+												entry->queryKey,
+												entry->queryCategory,
+												datum[StopMiddle - 1],
+												category[StopMiddle - 1]);
+					}
+
+					if (res == 0)
+					{
+						/*
+						 * Found exact match (there can be only one, except in
+						 * EMPTY_QUERY mode).
+						 *
+						 * If doing partial match, scan forward from here to
+						 * end of page to check for matches.
+						 *
+						 * See comment above about tuple's ordering.
+						 */
+						if (entry->isPartialMatch)
+							key->entryRes[j] =
+								matchPartialInPendingList(&so->ginstate,
+														  page,
+														  StopMiddle,
+														  pos->lastOffset,
+														  entry,
+														  datum,
+														  category,
+														  datumExtracted);
+						else
+							key->entryRes[j] = true;
+
+						/* done with binary search */
+						break;
+					}
+					else if (res < 0)
+						StopHigh = StopMiddle;
+					else
+						StopLow = StopMiddle + 1;
+				}
+
+				if (StopLow >= StopHigh && entry->isPartialMatch)
+				{
+					/*
+					 * No exact match on this page.  If doing partial match,
+					 * scan from the first tuple greater than target value to
+					 * end of page.  Note that since we don't remember whether
+					 * the comparePartialFn told us to stop early on a
+					 * previous page, we will uselessly apply comparePartialFn
+					 * to the first tuple on each subsequent page.
+					 */
+					key->entryRes[j] =
+						matchPartialInPendingList(&so->ginstate,
+												  page,
+												  StopHigh,
+												  pos->lastOffset,
+												  entry,
+												  datum,
+												  category,
+												  datumExtracted);
+				}
+
+				pos->hasMatchKey[i] |= key->entryRes[j];
+			}
+		}
+
+		/* Advance firstOffset over the scanned tuples */
+		pos->firstOffset = pos->lastOffset;
+
+		if (GinPageHasFullRow(page))
+		{
+			/*
+			 * We have examined all pending entries for the current heap row.
+			 * Break out of loop over pages.
+			 */
+			break;
+		}
+		else
+		{
+			/*
+			 * Advance to next page of pending entries for the current heap
+			 * row.  Complain if there isn't one.
+			 */
+			ItemPointerData item = pos->item;
+
+			if (scanGetCandidate(scan, pos) == false ||
+				!ItemPointerEquals(&pos->item, &item))
+				elog(ERROR, "could not find additional pending pages for same heap tuple");
+		}
+	}
+
+	/*
+	 * All scan keys except excludeOnly require at least one entry to match.
+	 * excludeOnly keys are an exception, because their implied
+	 * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
+	 * non-excludeOnly scan keys have at least one match.
+	 */
+	for (i = 0; i < so->nkeys; i++)
+	{
+		if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Collect all matched rows from pending list into bitmap.
+ */
+static void
+scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	MemoryContext oldCtx;
+	bool		recheck,
+				match;
+	int			i;
+	pendingPosition pos;
+	Buffer		metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+	Page		page;
+	BlockNumber blkno;
+
+	*ntids = 0;
+
+	/*
+	 * Acquire predicate lock on the metapage, to conflict with any fastupdate
+	 * insertions.
+	 */
+	PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
+
+	LockBuffer(metabuffer, GIN_SHARE);
+	page = BufferGetPage(metabuffer);
+	TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
+	blkno = GinPageGetMeta(page)->head;
+
+	/*
+	 * fetch head of list before unlocking metapage. head page must be pinned
+	 * to prevent deletion by vacuum process
+	 */
+	if (blkno == InvalidBlockNumber)
+	{
+		/* No pending list, so proceed with normal scan */
+		UnlockReleaseBuffer(metabuffer);
+		return;
+	}
+
+	pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
+	LockBuffer(pos.pendingBuffer, GIN_SHARE);
+	pos.firstOffset = FirstOffsetNumber;
+	UnlockReleaseBuffer(metabuffer);
+	pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
+
+	/*
+	 * loop for each heap row. scanGetCandidate returns full row or row's
+	 * tuples from first page.
+	 */
+	while (scanGetCandidate(scan, &pos))
+	{
+		/*
+		 * Check entries in tuple and set up entryRes array.
+		 *
+		 * If pending tuples belonging to the current heap row are spread
+		 * across several pages, collectMatchesForHeapRow will read all of
+		 * those pages.
+		 */
+		if (!collectMatchesForHeapRow(scan, &pos))
+			continue;
+
+		/*
+		 * Matching of entries of one row is finished, so check row using
+		 * consistent functions.
+		 */
+		oldCtx = MemoryContextSwitchTo(so->tempCtx);
+		recheck = false;
+		match = true;
+
+		for (i = 0; i < so->nkeys; i++)
+		{
+			GinScanKey	key = so->keys + i;
+
+			if (!key->boolConsistentFn(key))
+			{
+				match = false;
+				break;
+			}
+			recheck |= key->recheckCurItem;
+		}
+
+		MemoryContextSwitchTo(oldCtx);
+		MemoryContextReset(so->tempCtx);
+
+		if (match)
+		{
+			tbm_add_tuples(tbm, &pos.item, 1, recheck);
+			(*ntids)++;
+		}
+	}
+
+	pfree(pos.hasMatchKey);
+}
+
+
+#define GinIsVoidRes(s)		( ((GinScanOpaque) scan->opaque)->isVoidRes )
+
+int64
+gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	int64		ntids;
+	ItemPointerData iptr;
+	bool		recheck;
+
+	/*
+	 * Set up the scan keys, and check for unsatisfiable query.
+	 */
+	ginFreeScanKeys(so);		/* there should be no keys yet, but just to be
+								 * sure */
+	ginNewScanKey(scan);
+
+	if (GinIsVoidRes(scan))
+		return 0;
+
+	ntids = 0;
+
+	/*
+	 * First, scan the pending list and collect any matching entries into the
+	 * bitmap.  After we scan a pending item, some other backend could post it
+	 * into the main index, and so we might visit it a second time during the
+	 * main scan.  This is okay because we'll just re-set the same bit in the
+	 * bitmap.  (The possibility of duplicate visits is a major reason why GIN
+	 * can't support the amgettuple API, however.) Note that it would not do
+	 * to scan the main index before the pending list, since concurrent
+	 * cleanup could then make us miss entries entirely.
+	 */
+	scanPendingInsert(scan, tbm, &ntids);
+
+	/*
+	 * Now scan the main index.
+	 */
+	startScan(scan);
+
+	ItemPointerSetMin(&iptr);
+
+	for (;;)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		if (!scanGetItem(scan, iptr, &iptr, &recheck))
+			break;
+
+		if (ItemPointerIsLossyPage(&iptr))
+			tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
+		else
+			tbm_add_tuples(tbm, &iptr, 1, recheck);
+		ntids++;
+	}
+
+	return ntids;
+}
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
new file mode 100644
index 0000000..0e8672c
--- /dev/null
+++ b/src/backend/access/gin/gininsert.c
@@ -0,0 +1,541 @@
+/*-------------------------------------------------------------------------
+ *
+ * gininsert.c
+ *	  insert routines for the postgres inverted index access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/gininsert.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/tableam.h"
+#include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+typedef struct
+{
+	GinState	ginstate;
+	double		indtuples;
+	GinStatsData buildStats;
+	MemoryContext tmpCtx;
+	MemoryContext funcCtx;
+	BuildAccumulator accum;
+} GinBuildState;
+
+
+/*
+ * Adds array of item pointers to tuple's posting list, or
+ * creates posting tree and tuple pointing to tree in case
+ * of not enough space.  Max size of tuple is defined in
+ * GinFormTuple().  Returns a new, modified index tuple.
+ * items[] must be in sorted order with no duplicates.
+ */
+static IndexTuple
+addItemPointersToLeafTuple(GinState *ginstate,
+						   IndexTuple old,
+						   ItemPointerData *items, uint32 nitem,
+						   GinStatsData *buildStats, Buffer buffer)
+{
+	OffsetNumber attnum;
+	Datum		key;
+	GinNullCategory category;
+	IndexTuple	res;
+	ItemPointerData *newItems,
+			   *oldItems;
+	int			oldNPosting,
+				newNPosting;
+	GinPostingList *compressedList;
+
+	Assert(!GinIsPostingTree(old));
+
+	attnum = gintuple_get_attrnum(ginstate, old);
+	key = gintuple_get_key(ginstate, old, &category);
+
+	/* merge the old and new posting lists */
+	oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
+
+	newItems = ginMergeItemPointers(items, nitem,
+									oldItems, oldNPosting,
+									&newNPosting);
+
+	/* Compress the posting list, and try to a build tuple with room for it */
+	res = NULL;
+	compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
+											NULL);
+	pfree(newItems);
+	if (compressedList)
+	{
+		res = GinFormTuple(ginstate, attnum, key, category,
+						   (char *) compressedList,
+						   SizeOfGinPostingList(compressedList),
+						   newNPosting,
+						   false);
+		pfree(compressedList);
+	}
+	if (!res)
+	{
+		/* posting list would be too big, convert to posting tree */
+		BlockNumber postingRoot;
+
+		/*
+		 * Initialize posting tree with the old tuple's posting list.  It's
+		 * surely small enough to fit on one posting-tree page, and should
+		 * already be in order with no duplicates.
+		 */
+		postingRoot = createPostingTree(ginstate->index,
+										oldItems,
+										oldNPosting,
+										buildStats,
+										buffer);
+
+		/* Now insert the TIDs-to-be-added into the posting tree */
+		ginInsertItemPointers(ginstate->index, postingRoot,
+							  items, nitem,
+							  buildStats);
+
+		/* And build a new posting-tree-only result tuple */
+		res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
+		GinSetPostingTree(res, postingRoot);
+	}
+	pfree(oldItems);
+
+	return res;
+}
+
+/*
+ * Build a fresh leaf tuple, either posting-list or posting-tree format
+ * depending on whether the given items list will fit.
+ * items[] must be in sorted order with no duplicates.
+ *
+ * This is basically the same logic as in addItemPointersToLeafTuple,
+ * but working from slightly different input.
+ */
+static IndexTuple
+buildFreshLeafTuple(GinState *ginstate,
+					OffsetNumber attnum, Datum key, GinNullCategory category,
+					ItemPointerData *items, uint32 nitem,
+					GinStatsData *buildStats, Buffer buffer)
+{
+	IndexTuple	res = NULL;
+	GinPostingList *compressedList;
+
+	/* try to build a posting list tuple with all the items */
+	compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
+	if (compressedList)
+	{
+		res = GinFormTuple(ginstate, attnum, key, category,
+						   (char *) compressedList,
+						   SizeOfGinPostingList(compressedList),
+						   nitem, false);
+		pfree(compressedList);
+	}
+	if (!res)
+	{
+		/* posting list would be too big, build posting tree */
+		BlockNumber postingRoot;
+
+		/*
+		 * Build posting-tree-only result tuple.  We do this first so as to
+		 * fail quickly if the key is too big.
+		 */
+		res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
+
+		/*
+		 * Initialize a new posting tree with the TIDs.
+		 */
+		postingRoot = createPostingTree(ginstate->index, items, nitem,
+										buildStats, buffer);
+
+		/* And save the root link in the result tuple */
+		GinSetPostingTree(res, postingRoot);
+	}
+
+	return res;
+}
+
+/*
+ * Insert one or more heap TIDs associated with the given key value.
+ * This will either add a single key entry, or enlarge a pre-existing entry.
+ *
+ * During an index build, buildStats is non-null and the counters
+ * it contains should be incremented as needed.
+ */
+void
+ginEntryInsert(GinState *ginstate,
+			   OffsetNumber attnum, Datum key, GinNullCategory category,
+			   ItemPointerData *items, uint32 nitem,
+			   GinStatsData *buildStats)
+{
+	GinBtreeData btree;
+	GinBtreeEntryInsertData insertdata;
+	GinBtreeStack *stack;
+	IndexTuple	itup;
+	Page		page;
+
+	insertdata.isDelete = false;
+
+	ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+	btree.isBuild = (buildStats != NULL);
+
+	stack = ginFindLeafPage(&btree, false, false, NULL);
+	page = BufferGetPage(stack->buffer);
+
+	if (btree.findItem(&btree, stack))
+	{
+		/* found pre-existing entry */
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+
+		if (GinIsPostingTree(itup))
+		{
+			/* add entries to existing posting tree */
+			BlockNumber rootPostingTree = GinGetPostingTree(itup);
+
+			/* release all stack */
+			LockBuffer(stack->buffer, GIN_UNLOCK);
+			freeGinBtreeStack(stack);
+
+			/* insert into posting tree */
+			ginInsertItemPointers(ginstate->index, rootPostingTree,
+								  items, nitem,
+								  buildStats);
+			return;
+		}
+
+		CheckForSerializableConflictIn(ginstate->index, NULL,
+									   BufferGetBlockNumber(stack->buffer));
+		/* modify an existing leaf entry */
+		itup = addItemPointersToLeafTuple(ginstate, itup,
+										  items, nitem, buildStats, stack->buffer);
+
+		insertdata.isDelete = true;
+	}
+	else
+	{
+		CheckForSerializableConflictIn(ginstate->index, NULL,
+									   BufferGetBlockNumber(stack->buffer));
+		/* no match, so construct a new leaf entry */
+		itup = buildFreshLeafTuple(ginstate, attnum, key, category,
+								   items, nitem, buildStats, stack->buffer);
+
+		/*
+		 * nEntries counts leaf tuples, so increment it only when we make a
+		 * new one.
+		 */
+		if (buildStats)
+			buildStats->nEntries++;
+	}
+
+	/* Insert the new or modified leaf tuple */
+	insertdata.entry = itup;
+	ginInsertValue(&btree, stack, &insertdata, buildStats);
+	pfree(itup);
+}
+
+/*
+ * Extract index entries for a single indexable item, and add them to the
+ * BuildAccumulator's state.
+ *
+ * This function is used only during initial index creation.
+ */
+static void
+ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
+					   Datum value, bool isNull,
+					   ItemPointer heapptr)
+{
+	Datum	   *entries;
+	GinNullCategory *categories;
+	int32		nentries;
+	MemoryContext oldCtx;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
+	entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
+								value, isNull,
+								&nentries, &categories);
+	MemoryContextSwitchTo(oldCtx);
+
+	ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
+					   entries, categories, nentries);
+
+	buildstate->indtuples += nentries;
+
+	MemoryContextReset(buildstate->funcCtx);
+}
+
+static void
+ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
+				 bool *isnull, bool tupleIsAlive, void *state)
+{
+	GinBuildState *buildstate = (GinBuildState *) state;
+	MemoryContext oldCtx;
+	int			i;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+	for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
+		ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
+							   values[i], isnull[i], tid);
+
+	/* If we've maxed out our available memory, dump everything to the index */
+	if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
+	{
+		ItemPointerData *list;
+		Datum		key;
+		GinNullCategory category;
+		uint32		nlist;
+		OffsetNumber attnum;
+
+		ginBeginBAScan(&buildstate->accum);
+		while ((list = ginGetBAEntry(&buildstate->accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+			ginEntryInsert(&buildstate->ginstate, attnum, key, category,
+						   list, nlist, &buildstate->buildStats);
+		}
+
+		MemoryContextReset(buildstate->tmpCtx);
+		ginInitBA(&buildstate->accum);
+	}
+
+	MemoryContextSwitchTo(oldCtx);
+}
+
+IndexBuildResult *
+ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+	IndexBuildResult *result;
+	double		reltuples;
+	GinBuildState buildstate;
+	Buffer		RootBuffer,
+				MetaBuffer;
+	ItemPointerData *list;
+	Datum		key;
+	GinNullCategory category;
+	uint32		nlist;
+	MemoryContext oldCtx;
+	OffsetNumber attnum;
+
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	initGinState(&buildstate.ginstate, index);
+	buildstate.indtuples = 0;
+	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
+
+	/* initialize the meta page */
+	MetaBuffer = GinNewBuffer(index);
+
+	/* initialize the root page */
+	RootBuffer = GinNewBuffer(index);
+
+	START_CRIT_SECTION();
+	GinInitMetabuffer(MetaBuffer);
+	MarkBufferDirty(MetaBuffer);
+	GinInitBuffer(RootBuffer, GIN_LEAF);
+	MarkBufferDirty(RootBuffer);
+
+
+	UnlockReleaseBuffer(MetaBuffer);
+	UnlockReleaseBuffer(RootBuffer);
+	END_CRIT_SECTION();
+
+	/* count the root as first entry page */
+	buildstate.buildStats.nEntryPages++;
+
+	/*
+	 * create a temporary memory context that is used to hold data not yet
+	 * dumped out to the index
+	 */
+	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+											  "Gin build temporary context",
+											  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * create a temporary memory context that is used for calling
+	 * ginExtractEntries(), and can be reset after each tuple
+	 */
+	buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
+											   "Gin build temporary context for user-defined function",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	buildstate.accum.ginstate = &buildstate.ginstate;
+	ginInitBA(&buildstate.accum);
+
+	/*
+	 * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
+	 * prefers to receive tuples in TID order.
+	 */
+	reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
+									   ginBuildCallback, (void *) &buildstate,
+									   NULL);
+
+	/* dump remaining entries to the index */
+	oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+	ginBeginBAScan(&buildstate.accum);
+	while ((list = ginGetBAEntry(&buildstate.accum,
+								 &attnum, &key, &category, &nlist)) != NULL)
+	{
+		/* there could be many entries, so be willing to abort here */
+		CHECK_FOR_INTERRUPTS();
+		ginEntryInsert(&buildstate.ginstate, attnum, key, category,
+					   list, nlist, &buildstate.buildStats);
+	}
+	MemoryContextSwitchTo(oldCtx);
+
+	MemoryContextDelete(buildstate.funcCtx);
+	MemoryContextDelete(buildstate.tmpCtx);
+
+	/*
+	 * Update metapage stats
+	 */
+	buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
+	ginUpdateStats(index, &buildstate.buildStats, true);
+
+	/*
+	 * We didn't write WAL records as we built the index, so if WAL-logging is
+	 * required, write all pages to the WAL now.
+	 */
+	if (RelationNeedsWAL(index))
+	{
+		log_newpage_range(index, MAIN_FORKNUM,
+						  0, RelationGetNumberOfBlocks(index),
+						  true);
+	}
+
+	/*
+	 * Return statistics
+	 */
+	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+
+	result->heap_tuples = reltuples;
+	result->index_tuples = buildstate.indtuples;
+
+	return result;
+}
+
+/*
+ *	ginbuildempty() -- build an empty gin index in the initialization fork
+ */
+void
+ginbuildempty(Relation index)
+{
+	Buffer		RootBuffer,
+				MetaBuffer;
+
+	/* An empty GIN index has two pages. */
+	MetaBuffer =
+		ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
+	RootBuffer =
+		ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+	/* Initialize and xlog metabuffer and root buffer. */
+	START_CRIT_SECTION();
+	GinInitMetabuffer(MetaBuffer);
+	MarkBufferDirty(MetaBuffer);
+	log_newpage_buffer(MetaBuffer, true);
+	GinInitBuffer(RootBuffer, GIN_LEAF);
+	MarkBufferDirty(RootBuffer);
+	log_newpage_buffer(RootBuffer, false);
+	END_CRIT_SECTION();
+
+	/* Unlock and release the buffers. */
+	UnlockReleaseBuffer(MetaBuffer);
+	UnlockReleaseBuffer(RootBuffer);
+}
+
+/*
+ * Insert index entries for a single indexable item during "normal"
+ * (non-fast-update) insertion
+ */
+static void
+ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
+				   Datum value, bool isNull,
+				   ItemPointer item)
+{
+	Datum	   *entries;
+	GinNullCategory *categories;
+	int32		i,
+				nentries;
+
+	entries = ginExtractEntries(ginstate, attnum, value, isNull,
+								&nentries, &categories);
+
+	for (i = 0; i < nentries; i++)
+		ginEntryInsert(ginstate, attnum, entries[i], categories[i],
+					   item, 1, NULL);
+}
+
+bool
+gininsert(Relation index, Datum *values, bool *isnull,
+		  ItemPointer ht_ctid, Relation heapRel,
+		  IndexUniqueCheck checkUnique,
+		  bool indexUnchanged,
+		  IndexInfo *indexInfo)
+{
+	GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;
+	MemoryContext oldCtx;
+	MemoryContext insertCtx;
+	int			i;
+
+	/* Initialize GinState cache if first call in this statement */
+	if (ginstate == NULL)
+	{
+		oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
+		ginstate = (GinState *) palloc(sizeof(GinState));
+		initGinState(ginstate, index);
+		indexInfo->ii_AmCache = (void *) ginstate;
+		MemoryContextSwitchTo(oldCtx);
+	}
+
+	insertCtx = AllocSetContextCreate(CurrentMemoryContext,
+									  "Gin insert temporary context",
+									  ALLOCSET_DEFAULT_SIZES);
+
+	oldCtx = MemoryContextSwitchTo(insertCtx);
+
+	if (GinGetUseFastUpdate(index))
+	{
+		GinTupleCollector collector;
+
+		memset(&collector, 0, sizeof(GinTupleCollector));
+
+		for (i = 0; i < ginstate->origTupdesc->natts; i++)
+			ginHeapTupleFastCollect(ginstate, &collector,
+									(OffsetNumber) (i + 1),
+									values[i], isnull[i],
+									ht_ctid);
+
+		ginHeapTupleFastInsert(ginstate, &collector);
+	}
+	else
+	{
+		for (i = 0; i < ginstate->origTupdesc->natts; i++)
+			ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
+							   values[i], isnull[i],
+							   ht_ctid);
+	}
+
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextDelete(insertCtx);
+
+	return false;
+}
diff --git a/src/backend/access/gin/ginlogic.c b/src/backend/access/gin/ginlogic.c
new file mode 100644
index 0000000..6bf3288
--- /dev/null
+++ b/src/backend/access/gin/ginlogic.c
@@ -0,0 +1,246 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginlogic.c
+ *	  routines for performing binary- and ternary-logic consistent checks.
+ *
+ * A GIN operator class can provide a boolean or ternary consistent
+ * function, or both.  This file provides both boolean and ternary
+ * interfaces to the rest of the GIN code, even if only one of them is
+ * implemented by the opclass.
+ *
+ * Providing a boolean interface when the opclass implements only the
+ * ternary function is straightforward - just call the ternary function
+ * with the check-array as is, and map the GIN_TRUE, GIN_FALSE, GIN_MAYBE
+ * return codes to TRUE, FALSE and TRUE+recheck, respectively.  Providing
+ * a ternary interface when the opclass only implements a boolean function
+ * is implemented by calling the boolean function many times, with all the
+ * MAYBE arguments set to all combinations of TRUE and FALSE (up to a
+ * certain number of MAYBE arguments).
+ *
+ * (A boolean function is enough to determine if an item matches, but a
+ * GIN scan can apply various optimizations if it can determine that an
+ * item matches or doesn't match, even if it doesn't know if some of the
+ * keys are present or not.  That's what the ternary consistent function
+ * is used for.)
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginlogic.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/reloptions.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "miscadmin.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+
+
+/*
+ * Maximum number of MAYBE inputs that shimTriConsistentFn will try to
+ * resolve by calling all combinations.
+ */
+#define MAX_MAYBE_ENTRIES	4
+
+/*
+ * Dummy consistent functions for an EVERYTHING key.  Just claim it matches.
+ */
+static bool
+trueConsistentFn(GinScanKey key)
+{
+	key->recheckCurItem = false;
+	return true;
+}
+static GinTernaryValue
+trueTriConsistentFn(GinScanKey key)
+{
+	return GIN_TRUE;
+}
+
+/*
+ * A helper function for calling a regular, binary logic, consistent function.
+ */
+static bool
+directBoolConsistentFn(GinScanKey key)
+{
+	/*
+	 * Initialize recheckCurItem in case the consistentFn doesn't know it
+	 * should set it.  The safe assumption in that case is to force recheck.
+	 */
+	key->recheckCurItem = true;
+
+	return DatumGetBool(FunctionCall8Coll(key->consistentFmgrInfo,
+										  key->collation,
+										  PointerGetDatum(key->entryRes),
+										  UInt16GetDatum(key->strategy),
+										  key->query,
+										  UInt32GetDatum(key->nuserentries),
+										  PointerGetDatum(key->extra_data),
+										  PointerGetDatum(&key->recheckCurItem),
+										  PointerGetDatum(key->queryValues),
+										  PointerGetDatum(key->queryCategories)));
+}
+
+/*
+ * A helper function for calling a native ternary logic consistent function.
+ */
+static GinTernaryValue
+directTriConsistentFn(GinScanKey key)
+{
+	return DatumGetGinTernaryValue(FunctionCall7Coll(key->triConsistentFmgrInfo,
+													 key->collation,
+													 PointerGetDatum(key->entryRes),
+													 UInt16GetDatum(key->strategy),
+													 key->query,
+													 UInt32GetDatum(key->nuserentries),
+													 PointerGetDatum(key->extra_data),
+													 PointerGetDatum(key->queryValues),
+													 PointerGetDatum(key->queryCategories)));
+}
+
+/*
+ * This function implements a binary logic consistency check, using a ternary
+ * logic consistent function provided by the opclass. GIN_MAYBE return value
+ * is interpreted as true with recheck flag.
+ */
+static bool
+shimBoolConsistentFn(GinScanKey key)
+{
+	GinTernaryValue result;
+
+	result = DatumGetGinTernaryValue(FunctionCall7Coll(key->triConsistentFmgrInfo,
+													   key->collation,
+													   PointerGetDatum(key->entryRes),
+													   UInt16GetDatum(key->strategy),
+													   key->query,
+													   UInt32GetDatum(key->nuserentries),
+													   PointerGetDatum(key->extra_data),
+													   PointerGetDatum(key->queryValues),
+													   PointerGetDatum(key->queryCategories)));
+	if (result == GIN_MAYBE)
+	{
+		key->recheckCurItem = true;
+		return true;
+	}
+	else
+	{
+		key->recheckCurItem = false;
+		return result;
+	}
+}
+
+/*
+ * This function implements a tri-state consistency check, using a boolean
+ * consistent function provided by the opclass.
+ *
+ * Our strategy is to call consistentFn with MAYBE inputs replaced with every
+ * combination of TRUE/FALSE. If consistentFn returns the same value for every
+ * combination, that's the overall result. Otherwise, return MAYBE. Testing
+ * every combination is O(n^2), so this is only feasible for a small number of
+ * MAYBE inputs.
+ *
+ * NB: This function modifies the key->entryRes array!
+ */
+static GinTernaryValue
+shimTriConsistentFn(GinScanKey key)
+{
+	int			nmaybe;
+	int			maybeEntries[MAX_MAYBE_ENTRIES];
+	int			i;
+	bool		boolResult;
+	bool		recheck = false;
+	GinTernaryValue curResult;
+
+	/*
+	 * Count how many MAYBE inputs there are, and store their indexes in
+	 * maybeEntries. If there are too many MAYBE inputs, it's not feasible to
+	 * test all combinations, so give up and return MAYBE.
+	 */
+	nmaybe = 0;
+	for (i = 0; i < key->nentries; i++)
+	{
+		if (key->entryRes[i] == GIN_MAYBE)
+		{
+			if (nmaybe >= MAX_MAYBE_ENTRIES)
+				return GIN_MAYBE;
+			maybeEntries[nmaybe++] = i;
+		}
+	}
+
+	/*
+	 * If none of the inputs were MAYBE, so we can just call consistent
+	 * function as is.
+	 */
+	if (nmaybe == 0)
+		return directBoolConsistentFn(key);
+
+	/* First call consistent function with all the maybe-inputs set FALSE */
+	for (i = 0; i < nmaybe; i++)
+		key->entryRes[maybeEntries[i]] = GIN_FALSE;
+	curResult = directBoolConsistentFn(key);
+
+	for (;;)
+	{
+		/* Twiddle the entries for next combination. */
+		for (i = 0; i < nmaybe; i++)
+		{
+			if (key->entryRes[maybeEntries[i]] == GIN_FALSE)
+			{
+				key->entryRes[maybeEntries[i]] = GIN_TRUE;
+				break;
+			}
+			else
+				key->entryRes[maybeEntries[i]] = GIN_FALSE;
+		}
+		if (i == nmaybe)
+			break;
+
+		boolResult = directBoolConsistentFn(key);
+		recheck |= key->recheckCurItem;
+
+		if (curResult != boolResult)
+			return GIN_MAYBE;
+	}
+
+	/* TRUE with recheck is taken to mean MAYBE */
+	if (curResult == GIN_TRUE && recheck)
+		curResult = GIN_MAYBE;
+
+	return curResult;
+}
+
+/*
+ * Set up the implementation of the consistent functions for a scan key.
+ */
+void
+ginInitConsistentFunction(GinState *ginstate, GinScanKey key)
+{
+	if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
+	{
+		key->boolConsistentFn = trueConsistentFn;
+		key->triConsistentFn = trueTriConsistentFn;
+	}
+	else
+	{
+		key->consistentFmgrInfo = &ginstate->consistentFn[key->attnum - 1];
+		key->triConsistentFmgrInfo = &ginstate->triConsistentFn[key->attnum - 1];
+		key->collation = ginstate->supportCollation[key->attnum - 1];
+
+		if (OidIsValid(ginstate->consistentFn[key->attnum - 1].fn_oid))
+			key->boolConsistentFn = directBoolConsistentFn;
+		else
+			key->boolConsistentFn = shimBoolConsistentFn;
+
+		if (OidIsValid(ginstate->triConsistentFn[key->attnum - 1].fn_oid))
+			key->triConsistentFn = directTriConsistentFn;
+		else
+			key->triConsistentFn = shimTriConsistentFn;
+	}
+}
diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c
new file mode 100644
index 0000000..216b2b9
--- /dev/null
+++ b/src/backend/access/gin/ginpostinglist.c
@@ -0,0 +1,434 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginpostinglist.c
+ *	  routines for dealing with posting lists.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginpostinglist.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+
+#ifdef USE_ASSERT_CHECKING
+#define CHECK_ENCODING_ROUNDTRIP
+#endif
+
+/*
+ * For encoding purposes, item pointers are represented as 64-bit unsigned
+ * integers. The lowest 11 bits represent the offset number, and the next
+ * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
+ * only 43 low bits are used.
+ *
+ * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
+ * 2^11 on all supported block sizes. We are frugal with the bits, because
+ * smaller integers use fewer bytes in the varbyte encoding, saving disk
+ * space. (If we get a new table AM in the future that wants to use the full
+ * range of possible offset numbers, we'll need to change this.)
+ *
+ * These 43-bit integers are encoded using varbyte encoding. In each byte,
+ * the 7 low bits contain data, while the highest bit is a continuation bit.
+ * When the continuation bit is set, the next byte is part of the same
+ * integer, otherwise this is the last byte of this integer. 43 bits need
+ * at most 7 bytes in this encoding:
+ *
+ * 0XXXXXXX
+ * 1XXXXXXX 0XXXXYYY
+ * 1XXXXXXX 1XXXXYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
+ *
+ * X = bits used for offset number
+ * Y = bits used for block number
+ * u = unused bit
+ *
+ * The bytes are in stored in little-endian order.
+ *
+ * An important property of this encoding is that removing an item from list
+ * never increases the size of the resulting compressed posting list. Proof:
+ *
+ * Removing number is actually replacement of two numbers with their sum. We
+ * have to prove that varbyte encoding of a sum can't be longer than varbyte
+ * encoding of its summands. Sum of two numbers is at most one bit wider than
+ * the larger of the summands. Widening a number by one bit enlarges its length
+ * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
+ * is at most one byte longer than varbyte encoding of larger summand. Lesser
+ * summand is at least one byte, so the sum cannot take more space than the
+ * summands, Q.E.D.
+ *
+ * This property greatly simplifies VACUUM, which can assume that posting
+ * lists always fit on the same page after vacuuming. Note that even though
+ * that holds for removing items from a posting list, you must also be
+ * careful to not cause expansion e.g. when merging uncompressed items on the
+ * page into the compressed lists, when vacuuming.
+ */
+
+/*
+ * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
+ * integer, but you can't fit that many items on a page. 11 ought to be more
+ * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
+ * use the minimum number of bits, but that would require changing the on-disk
+ * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
+ */
+#define MaxHeapTuplesPerPageBits		11
+
+/* Max. number of bytes needed to encode the largest supported integer. */
+#define MaxBytesPerInteger				7
+
+static inline uint64
+itemptr_to_uint64(const ItemPointer iptr)
+{
+	uint64		val;
+
+	Assert(ItemPointerIsValid(iptr));
+	Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
+
+	val = GinItemPointerGetBlockNumber(iptr);
+	val <<= MaxHeapTuplesPerPageBits;
+	val |= GinItemPointerGetOffsetNumber(iptr);
+
+	return val;
+}
+
+static inline void
+uint64_to_itemptr(uint64 val, ItemPointer iptr)
+{
+	GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
+	val = val >> MaxHeapTuplesPerPageBits;
+	GinItemPointerSetBlockNumber(iptr, val);
+
+	Assert(ItemPointerIsValid(iptr));
+}
+
+/*
+ * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
+ */
+static void
+encode_varbyte(uint64 val, unsigned char **ptr)
+{
+	unsigned char *p = *ptr;
+
+	while (val > 0x7F)
+	{
+		*(p++) = 0x80 | (val & 0x7F);
+		val >>= 7;
+	}
+	*(p++) = (unsigned char) val;
+
+	*ptr = p;
+}
+
+/*
+ * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
+ */
+static uint64
+decode_varbyte(unsigned char **ptr)
+{
+	uint64		val;
+	unsigned char *p = *ptr;
+	uint64		c;
+
+	/* 1st byte */
+	c = *(p++);
+	val = c & 0x7F;
+	if (c & 0x80)
+	{
+		/* 2nd byte */
+		c = *(p++);
+		val |= (c & 0x7F) << 7;
+		if (c & 0x80)
+		{
+			/* 3rd byte */
+			c = *(p++);
+			val |= (c & 0x7F) << 14;
+			if (c & 0x80)
+			{
+				/* 4th byte */
+				c = *(p++);
+				val |= (c & 0x7F) << 21;
+				if (c & 0x80)
+				{
+					/* 5th byte */
+					c = *(p++);
+					val |= (c & 0x7F) << 28;
+					if (c & 0x80)
+					{
+						/* 6th byte */
+						c = *(p++);
+						val |= (c & 0x7F) << 35;
+						if (c & 0x80)
+						{
+							/* 7th byte, should not have continuation bit */
+							c = *(p++);
+							val |= c << 42;
+							Assert((c & 0x80) == 0);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	*ptr = p;
+
+	return val;
+}
+
+/*
+ * Encode a posting list.
+ *
+ * The encoded list is returned in a palloc'd struct, which will be at most
+ * 'maxsize' bytes in size.  The number items in the returned segment is
+ * returned in *nwritten. If it's not equal to nipd, not all the items fit
+ * in 'maxsize', and only the first *nwritten were encoded.
+ *
+ * The allocated size of the returned struct is short-aligned, and the padding
+ * byte at the end, if any, is zero.
+ */
+GinPostingList *
+ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
+					   int *nwritten)
+{
+	uint64		prev;
+	int			totalpacked = 0;
+	int			maxbytes;
+	GinPostingList *result;
+	unsigned char *ptr;
+	unsigned char *endptr;
+
+	maxsize = SHORTALIGN_DOWN(maxsize);
+
+	result = palloc(maxsize);
+
+	maxbytes = maxsize - offsetof(GinPostingList, bytes);
+	Assert(maxbytes > 0);
+
+	/* Store the first special item */
+	result->first = ipd[0];
+
+	prev = itemptr_to_uint64(&result->first);
+
+	ptr = result->bytes;
+	endptr = result->bytes + maxbytes;
+	for (totalpacked = 1; totalpacked < nipd; totalpacked++)
+	{
+		uint64		val = itemptr_to_uint64(&ipd[totalpacked]);
+		uint64		delta = val - prev;
+
+		Assert(val > prev);
+
+		if (endptr - ptr >= MaxBytesPerInteger)
+			encode_varbyte(delta, &ptr);
+		else
+		{
+			/*
+			 * There are less than 7 bytes left. Have to check if the next
+			 * item fits in that space before writing it out.
+			 */
+			unsigned char buf[MaxBytesPerInteger];
+			unsigned char *p = buf;
+
+			encode_varbyte(delta, &p);
+			if (p - buf > (endptr - ptr))
+				break;			/* output is full */
+
+			memcpy(ptr, buf, p - buf);
+			ptr += (p - buf);
+		}
+		prev = val;
+	}
+	result->nbytes = ptr - result->bytes;
+
+	/*
+	 * If we wrote an odd number of bytes, zero out the padding byte at the
+	 * end.
+	 */
+	if (result->nbytes != SHORTALIGN(result->nbytes))
+		result->bytes[result->nbytes] = 0;
+
+	if (nwritten)
+		*nwritten = totalpacked;
+
+	Assert(SizeOfGinPostingList(result) <= maxsize);
+
+	/*
+	 * Check that the encoded segment decodes back to the original items.
+	 */
+#if defined (CHECK_ENCODING_ROUNDTRIP)
+	{
+		int			ndecoded;
+		ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
+
+		Assert(ndecoded == totalpacked);
+		Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
+		pfree(tmp);
+	}
+#endif
+
+	return result;
+}
+
+/*
+ * Decode a compressed posting list into an array of item pointers.
+ * The number of items is returned in *ndecoded.
+ */
+ItemPointer
+ginPostingListDecode(GinPostingList *plist, int *ndecoded)
+{
+	return ginPostingListDecodeAllSegments(plist,
+										   SizeOfGinPostingList(plist),
+										   ndecoded);
+}
+
+/*
+ * Decode multiple posting list segments into an array of item pointers.
+ * The number of items is returned in *ndecoded_out. The segments are stored
+ * one after each other, with total size 'len' bytes.
+ */
+ItemPointer
+ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
+{
+	ItemPointer result;
+	int			nallocated;
+	uint64		val;
+	char	   *endseg = ((char *) segment) + len;
+	int			ndecoded;
+	unsigned char *ptr;
+	unsigned char *endptr;
+
+	/*
+	 * Guess an initial size of the array.
+	 */
+	nallocated = segment->nbytes * 2 + 1;
+	result = palloc(nallocated * sizeof(ItemPointerData));
+
+	ndecoded = 0;
+	while ((char *) segment < endseg)
+	{
+		/* enlarge output array if needed */
+		if (ndecoded >= nallocated)
+		{
+			nallocated *= 2;
+			result = repalloc(result, nallocated * sizeof(ItemPointerData));
+		}
+
+		/* copy the first item */
+		Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
+		Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
+		result[ndecoded] = segment->first;
+		ndecoded++;
+
+		val = itemptr_to_uint64(&segment->first);
+		ptr = segment->bytes;
+		endptr = segment->bytes + segment->nbytes;
+		while (ptr < endptr)
+		{
+			/* enlarge output array if needed */
+			if (ndecoded >= nallocated)
+			{
+				nallocated *= 2;
+				result = repalloc(result, nallocated * sizeof(ItemPointerData));
+			}
+
+			val += decode_varbyte(&ptr);
+
+			uint64_to_itemptr(val, &result[ndecoded]);
+			ndecoded++;
+		}
+		segment = GinNextPostingListSegment(segment);
+	}
+
+	if (ndecoded_out)
+		*ndecoded_out = ndecoded;
+	return result;
+}
+
+/*
+ * Add all item pointers from a bunch of posting lists to a TIDBitmap.
+ */
+int
+ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
+									 TIDBitmap *tbm)
+{
+	int			ndecoded;
+	ItemPointer items;
+
+	items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
+	tbm_add_tuples(tbm, items, ndecoded, false);
+	pfree(items);
+
+	return ndecoded;
+}
+
+/*
+ * Merge two ordered arrays of itempointers, eliminating any duplicates.
+ *
+ * Returns a palloc'd array, and *nmerged is set to the number of items in
+ * the result, after eliminating duplicates.
+ */
+ItemPointer
+ginMergeItemPointers(ItemPointerData *a, uint32 na,
+					 ItemPointerData *b, uint32 nb,
+					 int *nmerged)
+{
+	ItemPointerData *dst;
+
+	dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
+
+	/*
+	 * If the argument arrays don't overlap, we can just append them to each
+	 * other.
+	 */
+	if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
+	{
+		memcpy(dst, a, na * sizeof(ItemPointerData));
+		memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
+		*nmerged = na + nb;
+	}
+	else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
+	{
+		memcpy(dst, b, nb * sizeof(ItemPointerData));
+		memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
+		*nmerged = na + nb;
+	}
+	else
+	{
+		ItemPointerData *dptr = dst;
+		ItemPointerData *aptr = a;
+		ItemPointerData *bptr = b;
+
+		while (aptr - a < na && bptr - b < nb)
+		{
+			int			cmp = ginCompareItemPointers(aptr, bptr);
+
+			if (cmp > 0)
+				*dptr++ = *bptr++;
+			else if (cmp == 0)
+			{
+				/* only keep one copy of the identical items */
+				*dptr++ = *bptr++;
+				aptr++;
+			}
+			else
+				*dptr++ = *aptr++;
+		}
+
+		while (aptr - a < na)
+			*dptr++ = *aptr++;
+
+		while (bptr - b < nb)
+			*dptr++ = *bptr++;
+
+		*nmerged = dptr - dst;
+	}
+
+	return dst;
+}
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
new file mode 100644
index 0000000..55e2d49
--- /dev/null
+++ b/src/backend/access/gin/ginscan.c
@@ -0,0 +1,468 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginscan.c
+ *	  routines to manage scans of inverted index relations
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginscan.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/relscan.h"
+#include "pgstat.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+
+IndexScanDesc
+ginbeginscan(Relation rel, int nkeys, int norderbys)
+{
+	IndexScanDesc scan;
+	GinScanOpaque so;
+
+	/* no order by operators allowed */
+	Assert(norderbys == 0);
+
+	scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
+	/* allocate private workspace */
+	so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
+	so->keys = NULL;
+	so->nkeys = 0;
+	so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
+										"Gin scan temporary context",
+										ALLOCSET_DEFAULT_SIZES);
+	so->keyCtx = AllocSetContextCreate(CurrentMemoryContext,
+									   "Gin scan key context",
+									   ALLOCSET_DEFAULT_SIZES);
+	initGinState(&so->ginstate, scan->indexRelation);
+
+	scan->opaque = so;
+
+	return scan;
+}
+
+/*
+ * Create a new GinScanEntry, unless an equivalent one already exists,
+ * in which case just return it
+ */
+static GinScanEntry
+ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
+				 StrategyNumber strategy, int32 searchMode,
+				 Datum queryKey, GinNullCategory queryCategory,
+				 bool isPartialMatch, Pointer extra_data)
+{
+	GinState   *ginstate = &so->ginstate;
+	GinScanEntry scanEntry;
+	uint32		i;
+
+	/*
+	 * Look for an existing equivalent entry.
+	 *
+	 * Entries with non-null extra_data are never considered identical, since
+	 * we can't know exactly what the opclass might be doing with that.
+	 */
+	if (extra_data == NULL)
+	{
+		for (i = 0; i < so->totalentries; i++)
+		{
+			GinScanEntry prevEntry = so->entries[i];
+
+			if (prevEntry->extra_data == NULL &&
+				prevEntry->isPartialMatch == isPartialMatch &&
+				prevEntry->strategy == strategy &&
+				prevEntry->searchMode == searchMode &&
+				prevEntry->attnum == attnum &&
+				ginCompareEntries(ginstate, attnum,
+								  prevEntry->queryKey,
+								  prevEntry->queryCategory,
+								  queryKey,
+								  queryCategory) == 0)
+			{
+				/* Successful match */
+				return prevEntry;
+			}
+		}
+	}
+
+	/* Nope, create a new entry */
+	scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData));
+	scanEntry->queryKey = queryKey;
+	scanEntry->queryCategory = queryCategory;
+	scanEntry->isPartialMatch = isPartialMatch;
+	scanEntry->extra_data = extra_data;
+	scanEntry->strategy = strategy;
+	scanEntry->searchMode = searchMode;
+	scanEntry->attnum = attnum;
+
+	scanEntry->buffer = InvalidBuffer;
+	ItemPointerSetMin(&scanEntry->curItem);
+	scanEntry->matchBitmap = NULL;
+	scanEntry->matchIterator = NULL;
+	scanEntry->matchResult = NULL;
+	scanEntry->list = NULL;
+	scanEntry->nlist = 0;
+	scanEntry->offset = InvalidOffsetNumber;
+	scanEntry->isFinished = false;
+	scanEntry->reduceResult = false;
+
+	/* Add it to so's array */
+	if (so->totalentries >= so->allocentries)
+	{
+		so->allocentries *= 2;
+		so->entries = (GinScanEntry *)
+			repalloc(so->entries, so->allocentries * sizeof(GinScanEntry));
+	}
+	so->entries[so->totalentries++] = scanEntry;
+
+	return scanEntry;
+}
+
+/*
+ * Append hidden scan entry of given category to the scan key.
+ *
+ * NB: this had better be called at most once per scan key, since
+ * ginFillScanKey leaves room for only one hidden entry.  Currently,
+ * it seems sufficiently clear that this is true that we don't bother
+ * with any cross-check logic.
+ */
+static void
+ginScanKeyAddHiddenEntry(GinScanOpaque so, GinScanKey key,
+						 GinNullCategory queryCategory)
+{
+	int			i = key->nentries++;
+
+	/* strategy is of no interest because this is not a partial-match item */
+	key->scanEntry[i] = ginFillScanEntry(so, key->attnum,
+										 InvalidStrategy, key->searchMode,
+										 (Datum) 0, queryCategory,
+										 false, NULL);
+}
+
+/*
+ * Initialize the next GinScanKey using the output from the extractQueryFn
+ */
+static void
+ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
+			   StrategyNumber strategy, int32 searchMode,
+			   Datum query, uint32 nQueryValues,
+			   Datum *queryValues, GinNullCategory *queryCategories,
+			   bool *partial_matches, Pointer *extra_data)
+{
+	GinScanKey	key = &(so->keys[so->nkeys++]);
+	GinState   *ginstate = &so->ginstate;
+	uint32		i;
+
+	key->nentries = nQueryValues;
+	key->nuserentries = nQueryValues;
+
+	/* Allocate one extra array slot for possible "hidden" entry */
+	key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) *
+											 (nQueryValues + 1));
+	key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) *
+												(nQueryValues + 1));
+
+	key->query = query;
+	key->queryValues = queryValues;
+	key->queryCategories = queryCategories;
+	key->extra_data = extra_data;
+	key->strategy = strategy;
+	key->searchMode = searchMode;
+	key->attnum = attnum;
+
+	/*
+	 * Initially, scan keys of GIN_SEARCH_MODE_ALL mode are marked
+	 * excludeOnly.  This might get changed later.
+	 */
+	key->excludeOnly = (searchMode == GIN_SEARCH_MODE_ALL);
+
+	ItemPointerSetMin(&key->curItem);
+	key->curItemMatches = false;
+	key->recheckCurItem = false;
+	key->isFinished = false;
+	key->nrequired = 0;
+	key->nadditional = 0;
+	key->requiredEntries = NULL;
+	key->additionalEntries = NULL;
+
+	ginInitConsistentFunction(ginstate, key);
+
+	/* Set up normal scan entries using extractQueryFn's outputs */
+	for (i = 0; i < nQueryValues; i++)
+	{
+		Datum		queryKey;
+		GinNullCategory queryCategory;
+		bool		isPartialMatch;
+		Pointer		this_extra;
+
+		queryKey = queryValues[i];
+		queryCategory = queryCategories[i];
+		isPartialMatch =
+			(ginstate->canPartialMatch[attnum - 1] && partial_matches)
+			? partial_matches[i] : false;
+		this_extra = (extra_data) ? extra_data[i] : NULL;
+
+		key->scanEntry[i] = ginFillScanEntry(so, attnum,
+											 strategy, searchMode,
+											 queryKey, queryCategory,
+											 isPartialMatch, this_extra);
+	}
+
+	/*
+	 * For GIN_SEARCH_MODE_INCLUDE_EMPTY and GIN_SEARCH_MODE_EVERYTHING search
+	 * modes, we add the "hidden" entry immediately.  GIN_SEARCH_MODE_ALL is
+	 * handled later, since we might be able to omit the hidden entry for it.
+	 */
+	if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+		ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_ITEM);
+	else if (searchMode == GIN_SEARCH_MODE_EVERYTHING)
+		ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY);
+}
+
+/*
+ * Release current scan keys, if any.
+ */
+void
+ginFreeScanKeys(GinScanOpaque so)
+{
+	uint32		i;
+
+	if (so->keys == NULL)
+		return;
+
+	for (i = 0; i < so->totalentries; i++)
+	{
+		GinScanEntry entry = so->entries[i];
+
+		if (entry->buffer != InvalidBuffer)
+			ReleaseBuffer(entry->buffer);
+		if (entry->list)
+			pfree(entry->list);
+		if (entry->matchIterator)
+			tbm_end_iterate(entry->matchIterator);
+		if (entry->matchBitmap)
+			tbm_free(entry->matchBitmap);
+	}
+
+	MemoryContextResetAndDeleteChildren(so->keyCtx);
+
+	so->keys = NULL;
+	so->nkeys = 0;
+	so->entries = NULL;
+	so->totalentries = 0;
+}
+
+void
+ginNewScanKey(IndexScanDesc scan)
+{
+	ScanKey		scankey = scan->keyData;
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+	int			i;
+	bool		hasNullQuery = false;
+	bool		attrHasNormalScan[INDEX_MAX_KEYS] = {false};
+	MemoryContext oldCtx;
+
+	/*
+	 * Allocate all the scan key information in the key context. (If
+	 * extractQuery leaks anything there, it won't be reset until the end of
+	 * scan or rescan, but that's OK.)
+	 */
+	oldCtx = MemoryContextSwitchTo(so->keyCtx);
+
+	/* if no scan keys provided, allocate extra EVERYTHING GinScanKey */
+	so->keys = (GinScanKey)
+		palloc(Max(scan->numberOfKeys, 1) * sizeof(GinScanKeyData));
+	so->nkeys = 0;
+
+	/* initialize expansible array of GinScanEntry pointers */
+	so->totalentries = 0;
+	so->allocentries = 32;
+	so->entries = (GinScanEntry *)
+		palloc(so->allocentries * sizeof(GinScanEntry));
+
+	so->isVoidRes = false;
+
+	for (i = 0; i < scan->numberOfKeys; i++)
+	{
+		ScanKey		skey = &scankey[i];
+		Datum	   *queryValues;
+		int32		nQueryValues = 0;
+		bool	   *partial_matches = NULL;
+		Pointer    *extra_data = NULL;
+		bool	   *nullFlags = NULL;
+		GinNullCategory *categories;
+		int32		searchMode = GIN_SEARCH_MODE_DEFAULT;
+
+		/*
+		 * We assume that GIN-indexable operators are strict, so a null query
+		 * argument means an unsatisfiable query.
+		 */
+		if (skey->sk_flags & SK_ISNULL)
+		{
+			so->isVoidRes = true;
+			break;
+		}
+
+		/* OK to call the extractQueryFn */
+		queryValues = (Datum *)
+			DatumGetPointer(FunctionCall7Coll(&so->ginstate.extractQueryFn[skey->sk_attno - 1],
+											  so->ginstate.supportCollation[skey->sk_attno - 1],
+											  skey->sk_argument,
+											  PointerGetDatum(&nQueryValues),
+											  UInt16GetDatum(skey->sk_strategy),
+											  PointerGetDatum(&partial_matches),
+											  PointerGetDatum(&extra_data),
+											  PointerGetDatum(&nullFlags),
+											  PointerGetDatum(&searchMode)));
+
+		/*
+		 * If bogus searchMode is returned, treat as GIN_SEARCH_MODE_ALL; note
+		 * in particular we don't allow extractQueryFn to select
+		 * GIN_SEARCH_MODE_EVERYTHING.
+		 */
+		if (searchMode < GIN_SEARCH_MODE_DEFAULT ||
+			searchMode > GIN_SEARCH_MODE_ALL)
+			searchMode = GIN_SEARCH_MODE_ALL;
+
+		/* Non-default modes require the index to have placeholders */
+		if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+			hasNullQuery = true;
+
+		/*
+		 * In default mode, no keys means an unsatisfiable query.
+		 */
+		if (queryValues == NULL || nQueryValues <= 0)
+		{
+			if (searchMode == GIN_SEARCH_MODE_DEFAULT)
+			{
+				so->isVoidRes = true;
+				break;
+			}
+			nQueryValues = 0;	/* ensure sane value */
+		}
+
+		/*
+		 * Create GinNullCategory representation.  If the extractQueryFn
+		 * didn't create a nullFlags array, we assume everything is non-null.
+		 * While at it, detect whether any null keys are present.
+		 */
+		categories = (GinNullCategory *) palloc0(nQueryValues * sizeof(GinNullCategory));
+		if (nullFlags)
+		{
+			int32		j;
+
+			for (j = 0; j < nQueryValues; j++)
+			{
+				if (nullFlags[j])
+				{
+					categories[j] = GIN_CAT_NULL_KEY;
+					hasNullQuery = true;
+				}
+			}
+		}
+
+		ginFillScanKey(so, skey->sk_attno,
+					   skey->sk_strategy, searchMode,
+					   skey->sk_argument, nQueryValues,
+					   queryValues, categories,
+					   partial_matches, extra_data);
+
+		/* Remember if we had any non-excludeOnly keys */
+		if (searchMode != GIN_SEARCH_MODE_ALL)
+			attrHasNormalScan[skey->sk_attno - 1] = true;
+	}
+
+	/*
+	 * Processing GIN_SEARCH_MODE_ALL scan keys requires us to make a second
+	 * pass over the scan keys.  Above we marked each such scan key as
+	 * excludeOnly.  If the involved column has any normal (not excludeOnly)
+	 * scan key as well, then we can leave it like that.  Otherwise, one
+	 * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry
+	 * and be set to normal (excludeOnly = false).
+	 */
+	for (i = 0; i < so->nkeys; i++)
+	{
+		GinScanKey	key = &so->keys[i];
+
+		if (key->searchMode != GIN_SEARCH_MODE_ALL)
+			continue;
+
+		if (!attrHasNormalScan[key->attnum - 1])
+		{
+			key->excludeOnly = false;
+			ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY);
+			attrHasNormalScan[key->attnum - 1] = true;
+		}
+	}
+
+	/*
+	 * If there are no regular scan keys, generate an EVERYTHING scankey to
+	 * drive a full-index scan.
+	 */
+	if (so->nkeys == 0 && !so->isVoidRes)
+	{
+		hasNullQuery = true;
+		ginFillScanKey(so, FirstOffsetNumber,
+					   InvalidStrategy, GIN_SEARCH_MODE_EVERYTHING,
+					   (Datum) 0, 0,
+					   NULL, NULL, NULL, NULL);
+	}
+
+	/*
+	 * If the index is version 0, it may be missing null and placeholder
+	 * entries, which would render searches for nulls and full-index scans
+	 * unreliable.  Throw an error if so.
+	 */
+	if (hasNullQuery && !so->isVoidRes)
+	{
+		GinStatsData ginStats;
+
+		ginGetStats(scan->indexRelation, &ginStats);
+		if (ginStats.ginVersion < 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("old GIN indexes do not support whole-index scans nor searches for nulls"),
+					 errhint("To fix this, do REINDEX INDEX \"%s\".",
+							 RelationGetRelationName(scan->indexRelation))));
+	}
+
+	MemoryContextSwitchTo(oldCtx);
+
+	pgstat_count_index_scan(scan->indexRelation);
+}
+
+void
+ginrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+		  ScanKey orderbys, int norderbys)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+
+	ginFreeScanKeys(so);
+
+	if (scankey && scan->numberOfKeys > 0)
+	{
+		memmove(scan->keyData, scankey,
+				scan->numberOfKeys * sizeof(ScanKeyData));
+	}
+}
+
+
+void
+ginendscan(IndexScanDesc scan)
+{
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
+
+	ginFreeScanKeys(so);
+
+	MemoryContextDelete(so->tempCtx);
+	MemoryContextDelete(so->keyCtx);
+
+	pfree(so);
+}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
new file mode 100644
index 0000000..cdd626f
--- /dev/null
+++ b/src/backend/access/gin/ginutil.c
@@ -0,0 +1,707 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginutil.c
+ *	  Utility routines for the Postgres inverted index access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginutil.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/reloptions.h"
+#include "access/xloginsert.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/builtins.h"
+#include "utils/index_selfuncs.h"
+#include "utils/typcache.h"
+
+
+/*
+ * GIN handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+Datum
+ginhandler(PG_FUNCTION_ARGS)
+{
+	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+	amroutine->amstrategies = 0;
+	amroutine->amsupport = GINNProcs;
+	amroutine->amoptsprocnum = GIN_OPTIONS_PROC;
+	amroutine->amcanorder = false;
+	amroutine->amcanorderbyop = false;
+	amroutine->amcanbackward = false;
+	amroutine->amcanunique = false;
+	amroutine->amcanmulticol = true;
+	amroutine->amoptionalkey = true;
+	amroutine->amsearcharray = false;
+	amroutine->amsearchnulls = false;
+	amroutine->amstorage = true;
+	amroutine->amclusterable = false;
+	amroutine->ampredlocks = true;
+	amroutine->amcanparallel = false;
+	amroutine->amcaninclude = false;
+	amroutine->amusemaintenanceworkmem = true;
+	amroutine->amparallelvacuumoptions =
+		VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
+	amroutine->amkeytype = InvalidOid;
+
+	amroutine->ambuild = ginbuild;
+	amroutine->ambuildempty = ginbuildempty;
+	amroutine->aminsert = gininsert;
+	amroutine->ambulkdelete = ginbulkdelete;
+	amroutine->amvacuumcleanup = ginvacuumcleanup;
+	amroutine->amcanreturn = NULL;
+	amroutine->amcostestimate = gincostestimate;
+	amroutine->amoptions = ginoptions;
+	amroutine->amproperty = NULL;
+	amroutine->ambuildphasename = NULL;
+	amroutine->amvalidate = ginvalidate;
+	amroutine->amadjustmembers = ginadjustmembers;
+	amroutine->ambeginscan = ginbeginscan;
+	amroutine->amrescan = ginrescan;
+	amroutine->amgettuple = NULL;
+	amroutine->amgetbitmap = gingetbitmap;
+	amroutine->amendscan = ginendscan;
+	amroutine->ammarkpos = NULL;
+	amroutine->amrestrpos = NULL;
+	amroutine->amestimateparallelscan = NULL;
+	amroutine->aminitparallelscan = NULL;
+	amroutine->amparallelrescan = NULL;
+
+	PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * initGinState: fill in an empty GinState struct to describe the index
+ *
+ * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
+ */
+void
+initGinState(GinState *state, Relation index)
+{
+	TupleDesc	origTupdesc = RelationGetDescr(index);
+	int			i;
+
+	MemSet(state, 0, sizeof(GinState));
+
+	state->index = index;
+	state->oneCol = (origTupdesc->natts == 1) ? true : false;
+	state->origTupdesc = origTupdesc;
+
+	for (i = 0; i < origTupdesc->natts; i++)
+	{
+		Form_pg_attribute attr = TupleDescAttr(origTupdesc, i);
+
+		if (state->oneCol)
+			state->tupdesc[i] = state->origTupdesc;
+		else
+		{
+			state->tupdesc[i] = CreateTemplateTupleDesc(2);
+
+			TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
+							   INT2OID, -1, 0);
+			TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
+							   attr->atttypid,
+							   attr->atttypmod,
+							   attr->attndims);
+			TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
+										attr->attcollation);
+		}
+
+		/*
+		 * If the compare proc isn't specified in the opclass definition, look
+		 * up the index key type's default btree comparator.
+		 */
+		if (index_getprocid(index, i + 1, GIN_COMPARE_PROC) != InvalidOid)
+		{
+			fmgr_info_copy(&(state->compareFn[i]),
+						   index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
+						   CurrentMemoryContext);
+		}
+		else
+		{
+			TypeCacheEntry *typentry;
+
+			typentry = lookup_type_cache(attr->atttypid,
+										 TYPECACHE_CMP_PROC_FINFO);
+			if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a comparison function for type %s",
+								format_type_be(attr->atttypid))));
+			fmgr_info_copy(&(state->compareFn[i]),
+						   &(typentry->cmp_proc_finfo),
+						   CurrentMemoryContext);
+		}
+
+		/* Opclass must always provide extract procs */
+		fmgr_info_copy(&(state->extractValueFn[i]),
+					   index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
+					   CurrentMemoryContext);
+		fmgr_info_copy(&(state->extractQueryFn[i]),
+					   index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
+					   CurrentMemoryContext);
+
+		/*
+		 * Check opclass capability to do tri-state or binary logic consistent
+		 * check.
+		 */
+		if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid)
+		{
+			fmgr_info_copy(&(state->triConsistentFn[i]),
+						   index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
+						   CurrentMemoryContext);
+		}
+
+		if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid)
+		{
+			fmgr_info_copy(&(state->consistentFn[i]),
+						   index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
+						   CurrentMemoryContext);
+		}
+
+		if (state->consistentFn[i].fn_oid == InvalidOid &&
+			state->triConsistentFn[i].fn_oid == InvalidOid)
+		{
+			elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"",
+				 GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC,
+				 i + 1, RelationGetRelationName(index));
+		}
+
+		/*
+		 * Check opclass capability to do partial match.
+		 */
+		if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
+		{
+			fmgr_info_copy(&(state->comparePartialFn[i]),
+						   index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
+						   CurrentMemoryContext);
+			state->canPartialMatch[i] = true;
+		}
+		else
+		{
+			state->canPartialMatch[i] = false;
+		}
+
+		/*
+		 * If the index column has a specified collation, we should honor that
+		 * while doing comparisons.  However, we may have a collatable storage
+		 * type for a noncollatable indexed data type (for instance, hstore
+		 * uses text index entries).  If there's no index collation then
+		 * specify default collation in case the support functions need
+		 * collation.  This is harmless if the support functions don't care
+		 * about collation, so we just do it unconditionally.  (We could
+		 * alternatively call get_typcollation, but that seems like expensive
+		 * overkill --- there aren't going to be any cases where a GIN storage
+		 * type has a nondefault collation.)
+		 */
+		if (OidIsValid(index->rd_indcollation[i]))
+			state->supportCollation[i] = index->rd_indcollation[i];
+		else
+			state->supportCollation[i] = DEFAULT_COLLATION_OID;
+	}
+}
+
+/*
+ * Extract attribute (column) number of stored entry from GIN tuple
+ */
+OffsetNumber
+gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
+{
+	OffsetNumber colN;
+
+	if (ginstate->oneCol)
+	{
+		/* column number is not stored explicitly */
+		colN = FirstOffsetNumber;
+	}
+	else
+	{
+		Datum		res;
+		bool		isnull;
+
+		/*
+		 * First attribute is always int16, so we can safely use any tuple
+		 * descriptor to obtain first attribute of tuple
+		 */
+		res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
+							&isnull);
+		Assert(!isnull);
+
+		colN = DatumGetUInt16(res);
+		Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
+	}
+
+	return colN;
+}
+
+/*
+ * Extract stored datum (and possible null category) from GIN tuple
+ */
+Datum
+gintuple_get_key(GinState *ginstate, IndexTuple tuple,
+				 GinNullCategory *category)
+{
+	Datum		res;
+	bool		isnull;
+
+	if (ginstate->oneCol)
+	{
+		/*
+		 * Single column index doesn't store attribute numbers in tuples
+		 */
+		res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
+							&isnull);
+	}
+	else
+	{
+		/*
+		 * Since the datum type depends on which index column it's from, we
+		 * must be careful to use the right tuple descriptor here.
+		 */
+		OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
+
+		res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
+							ginstate->tupdesc[colN - 1],
+							&isnull);
+	}
+
+	if (isnull)
+		*category = GinGetNullCategory(tuple, ginstate);
+	else
+		*category = GIN_CAT_NORM_KEY;
+
+	return res;
+}
+
+/*
+ * Allocate a new page (either by recycling, or by extending the index file)
+ * The returned buffer is already pinned and exclusive-locked
+ * Caller is responsible for initializing the page by calling GinInitBuffer
+ */
+Buffer
+GinNewBuffer(Relation index)
+{
+	Buffer		buffer;
+	bool		needLock;
+
+	/* First, try to get a page from FSM */
+	for (;;)
+	{
+		BlockNumber blkno = GetFreeIndexPage(index);
+
+		if (blkno == InvalidBlockNumber)
+			break;
+
+		buffer = ReadBuffer(index, blkno);
+
+		/*
+		 * We have to guard against the possibility that someone else already
+		 * recycled this page; the buffer may be locked if so.
+		 */
+		if (ConditionalLockBuffer(buffer))
+		{
+			if (GinPageIsRecyclable(BufferGetPage(buffer)))
+				return buffer;	/* OK to use */
+
+			LockBuffer(buffer, GIN_UNLOCK);
+		}
+
+		/* Can't use it, so release buffer and try again */
+		ReleaseBuffer(buffer);
+	}
+
+	/* Must extend the file */
+	needLock = !RELATION_IS_LOCAL(index);
+	if (needLock)
+		LockRelationForExtension(index, ExclusiveLock);
+
+	buffer = ReadBuffer(index, P_NEW);
+	LockBuffer(buffer, GIN_EXCLUSIVE);
+
+	if (needLock)
+		UnlockRelationForExtension(index, ExclusiveLock);
+
+	return buffer;
+}
+
+void
+GinInitPage(Page page, uint32 f, Size pageSize)
+{
+	GinPageOpaque opaque;
+
+	PageInit(page, pageSize, sizeof(GinPageOpaqueData));
+
+	opaque = GinPageGetOpaque(page);
+	opaque->flags = f;
+	opaque->rightlink = InvalidBlockNumber;
+}
+
+void
+GinInitBuffer(Buffer b, uint32 f)
+{
+	GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
+}
+
+void
+GinInitMetabuffer(Buffer b)
+{
+	GinMetaPageData *metadata;
+	Page		page = BufferGetPage(b);
+
+	GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+	metadata = GinPageGetMeta(page);
+
+	metadata->head = metadata->tail = InvalidBlockNumber;
+	metadata->tailFreeSize = 0;
+	metadata->nPendingPages = 0;
+	metadata->nPendingHeapTuples = 0;
+	metadata->nTotalPages = 0;
+	metadata->nEntryPages = 0;
+	metadata->nDataPages = 0;
+	metadata->nEntries = 0;
+	metadata->ginVersion = GIN_CURRENT_VERSION;
+
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is essential,
+	 * because without doing so, metadata will be lost if xlog.c compresses
+	 * the page.
+	 */
+	((PageHeader) page)->pd_lower =
+		((char *) metadata + sizeof(GinMetaPageData)) - (char *) page;
+}
+
+/*
+ * Compare two keys of the same index column
+ */
+int
+ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
+				  Datum a, GinNullCategory categorya,
+				  Datum b, GinNullCategory categoryb)
+{
+	/* if not of same null category, sort by that first */
+	if (categorya != categoryb)
+		return (categorya < categoryb) ? -1 : 1;
+
+	/* all null items in same category are equal */
+	if (categorya != GIN_CAT_NORM_KEY)
+		return 0;
+
+	/* both not null, so safe to call the compareFn */
+	return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
+										   ginstate->supportCollation[attnum - 1],
+										   a, b));
+}
+
+/*
+ * Compare two keys of possibly different index columns
+ */
+int
+ginCompareAttEntries(GinState *ginstate,
+					 OffsetNumber attnuma, Datum a, GinNullCategory categorya,
+					 OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
+{
+	/* attribute number is the first sort key */
+	if (attnuma != attnumb)
+		return (attnuma < attnumb) ? -1 : 1;
+
+	return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
+}
+
+
+/*
+ * Support for sorting key datums in ginExtractEntries
+ *
+ * Note: we only have to worry about null and not-null keys here;
+ * ginExtractEntries never generates more than one placeholder null,
+ * so it doesn't have to sort those.
+ */
+typedef struct
+{
+	Datum		datum;
+	bool		isnull;
+} keyEntryData;
+
+typedef struct
+{
+	FmgrInfo   *cmpDatumFunc;
+	Oid			collation;
+	bool		haveDups;
+} cmpEntriesArg;
+
+static int
+cmpEntries(const void *a, const void *b, void *arg)
+{
+	const keyEntryData *aa = (const keyEntryData *) a;
+	const keyEntryData *bb = (const keyEntryData *) b;
+	cmpEntriesArg *data = (cmpEntriesArg *) arg;
+	int			res;
+
+	if (aa->isnull)
+	{
+		if (bb->isnull)
+			res = 0;			/* NULL "=" NULL */
+		else
+			res = 1;			/* NULL ">" not-NULL */
+	}
+	else if (bb->isnull)
+		res = -1;				/* not-NULL "<" NULL */
+	else
+		res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
+											  data->collation,
+											  aa->datum, bb->datum));
+
+	/*
+	 * Detect if we have any duplicates.  If there are equal keys, qsort must
+	 * compare them at some point, else it wouldn't know whether one should go
+	 * before or after the other.
+	 */
+	if (res == 0)
+		data->haveDups = true;
+
+	return res;
+}
+
+
+/*
+ * Extract the index key values from an indexable item
+ *
+ * The resulting key values are sorted, and any duplicates are removed.
+ * This avoids generating redundant index entries.
+ */
+Datum *
+ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
+				  Datum value, bool isNull,
+				  int32 *nentries, GinNullCategory **categories)
+{
+	Datum	   *entries;
+	bool	   *nullFlags;
+	int32		i;
+
+	/*
+	 * We don't call the extractValueFn on a null item.  Instead generate a
+	 * placeholder.
+	 */
+	if (isNull)
+	{
+		*nentries = 1;
+		entries = (Datum *) palloc(sizeof(Datum));
+		entries[0] = (Datum) 0;
+		*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
+		(*categories)[0] = GIN_CAT_NULL_ITEM;
+		return entries;
+	}
+
+	/* OK, call the opclass's extractValueFn */
+	nullFlags = NULL;			/* in case extractValue doesn't set it */
+	entries = (Datum *)
+		DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
+										  ginstate->supportCollation[attnum - 1],
+										  value,
+										  PointerGetDatum(nentries),
+										  PointerGetDatum(&nullFlags)));
+
+	/*
+	 * Generate a placeholder if the item contained no keys.
+	 */
+	if (entries == NULL || *nentries <= 0)
+	{
+		*nentries = 1;
+		entries = (Datum *) palloc(sizeof(Datum));
+		entries[0] = (Datum) 0;
+		*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
+		(*categories)[0] = GIN_CAT_EMPTY_ITEM;
+		return entries;
+	}
+
+	/*
+	 * If the extractValueFn didn't create a nullFlags array, create one,
+	 * assuming that everything's non-null.
+	 */
+	if (nullFlags == NULL)
+		nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
+
+	/*
+	 * If there's more than one key, sort and unique-ify.
+	 *
+	 * XXX Using qsort here is notationally painful, and the overhead is
+	 * pretty bad too.  For small numbers of keys it'd likely be better to use
+	 * a simple insertion sort.
+	 */
+	if (*nentries > 1)
+	{
+		keyEntryData *keydata;
+		cmpEntriesArg arg;
+
+		keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
+		for (i = 0; i < *nentries; i++)
+		{
+			keydata[i].datum = entries[i];
+			keydata[i].isnull = nullFlags[i];
+		}
+
+		arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
+		arg.collation = ginstate->supportCollation[attnum - 1];
+		arg.haveDups = false;
+		qsort_arg(keydata, *nentries, sizeof(keyEntryData),
+				  cmpEntries, (void *) &arg);
+
+		if (arg.haveDups)
+		{
+			/* there are duplicates, must get rid of 'em */
+			int32		j;
+
+			entries[0] = keydata[0].datum;
+			nullFlags[0] = keydata[0].isnull;
+			j = 1;
+			for (i = 1; i < *nentries; i++)
+			{
+				if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
+				{
+					entries[j] = keydata[i].datum;
+					nullFlags[j] = keydata[i].isnull;
+					j++;
+				}
+			}
+			*nentries = j;
+		}
+		else
+		{
+			/* easy, no duplicates */
+			for (i = 0; i < *nentries; i++)
+			{
+				entries[i] = keydata[i].datum;
+				nullFlags[i] = keydata[i].isnull;
+			}
+		}
+
+		pfree(keydata);
+	}
+
+	/*
+	 * Create GinNullCategory representation from nullFlags.
+	 */
+	*categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
+	for (i = 0; i < *nentries; i++)
+		(*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
+
+	return entries;
+}
+
+bytea *
+ginoptions(Datum reloptions, bool validate)
+{
+	static const relopt_parse_elt tab[] = {
+		{"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)},
+		{"gin_pending_list_limit", RELOPT_TYPE_INT, offsetof(GinOptions,
+															 pendingListCleanupSize)}
+	};
+
+	return (bytea *) build_reloptions(reloptions, validate,
+									  RELOPT_KIND_GIN,
+									  sizeof(GinOptions),
+									  tab, lengthof(tab));
+}
+
+/*
+ * Fetch index's statistical data into *stats
+ *
+ * Note: in the result, nPendingPages can be trusted to be up-to-date,
+ * as can ginVersion; but the other fields are as of the last VACUUM.
+ */
+void
+ginGetStats(Relation index, GinStatsData *stats)
+{
+	Buffer		metabuffer;
+	Page		metapage;
+	GinMetaPageData *metadata;
+
+	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+	LockBuffer(metabuffer, GIN_SHARE);
+	metapage = BufferGetPage(metabuffer);
+	metadata = GinPageGetMeta(metapage);
+
+	stats->nPendingPages = metadata->nPendingPages;
+	stats->nTotalPages = metadata->nTotalPages;
+	stats->nEntryPages = metadata->nEntryPages;
+	stats->nDataPages = metadata->nDataPages;
+	stats->nEntries = metadata->nEntries;
+	stats->ginVersion = metadata->ginVersion;
+
+	UnlockReleaseBuffer(metabuffer);
+}
+
+/*
+ * Write the given statistics to the index's metapage
+ *
+ * Note: nPendingPages and ginVersion are *not* copied over
+ */
+void
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
+{
+	Buffer		metabuffer;
+	Page		metapage;
+	GinMetaPageData *metadata;
+
+	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+	LockBuffer(metabuffer, GIN_EXCLUSIVE);
+	metapage = BufferGetPage(metabuffer);
+	metadata = GinPageGetMeta(metapage);
+
+	START_CRIT_SECTION();
+
+	metadata->nTotalPages = stats->nTotalPages;
+	metadata->nEntryPages = stats->nEntryPages;
+	metadata->nDataPages = stats->nDataPages;
+	metadata->nEntries = stats->nEntries;
+
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is essential,
+	 * because without doing so, metadata will be lost if xlog.c compresses
+	 * the page.  (We must do this here because pre-v11 versions of PG did not
+	 * set the metapage's pd_lower correctly, so a pg_upgraded index might
+	 * contain the wrong value.)
+	 */
+	((PageHeader) metapage)->pd_lower =
+		((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
+
+	MarkBufferDirty(metabuffer);
+
+	if (RelationNeedsWAL(index) && !is_build)
+	{
+		XLogRecPtr	recptr;
+		ginxlogUpdateMeta data;
+
+		data.node = index->rd_node;
+		data.ntuples = 0;
+		data.newRightlink = data.prevTail = InvalidBlockNumber;
+		memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+		XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
+		PageSetLSN(metapage, recptr);
+	}
+
+	UnlockReleaseBuffer(metabuffer);
+
+	END_CRIT_SECTION();
+}
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
new file mode 100644
index 0000000..a276eb0
--- /dev/null
+++ b/src/backend/access/gin/ginvacuum.c
@@ -0,0 +1,822 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginvacuum.c
+ *	  delete & vacuum routines for the postgres GIN
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginvacuum.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xloginsert.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/memutils.h"
+
+struct GinVacuumState
+{
+	Relation	index;
+	IndexBulkDeleteResult *result;
+	IndexBulkDeleteCallback callback;
+	void	   *callback_state;
+	GinState	ginstate;
+	BufferAccessStrategy strategy;
+	MemoryContext tmpCxt;
+};
+
+/*
+ * Vacuums an uncompressed posting list. The size of the must can be specified
+ * in number of items (nitems).
+ *
+ * If none of the items need to be removed, returns NULL. Otherwise returns
+ * a new palloc'd array with the remaining items. The number of remaining
+ * items is returned in *nremaining.
+ */
+ItemPointer
+ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
+					  int nitem, int *nremaining)
+{
+	int			i,
+				remaining = 0;
+	ItemPointer tmpitems = NULL;
+
+	/*
+	 * Iterate over TIDs array
+	 */
+	for (i = 0; i < nitem; i++)
+	{
+		if (gvs->callback(items + i, gvs->callback_state))
+		{
+			gvs->result->tuples_removed += 1;
+			if (!tmpitems)
+			{
+				/*
+				 * First TID to be deleted: allocate memory to hold the
+				 * remaining items.
+				 */
+				tmpitems = palloc(sizeof(ItemPointerData) * nitem);
+				memcpy(tmpitems, items, sizeof(ItemPointerData) * i);
+			}
+		}
+		else
+		{
+			gvs->result->num_index_tuples += 1;
+			if (tmpitems)
+				tmpitems[remaining] = items[i];
+			remaining++;
+		}
+	}
+
+	*nremaining = remaining;
+	return tmpitems;
+}
+
+/*
+ * Create a WAL record for vacuuming entry tree leaf page.
+ */
+static void
+xlogVacuumPage(Relation index, Buffer buffer)
+{
+	Page		page = BufferGetPage(buffer);
+	XLogRecPtr	recptr;
+
+	/* This is only used for entry tree leaf pages. */
+	Assert(!GinPageIsData(page));
+	Assert(GinPageIsLeaf(page));
+
+	if (!RelationNeedsWAL(index))
+		return;
+
+	/*
+	 * Always create a full image, we don't track the changes on the page at
+	 * any more fine-grained level. This could obviously be improved...
+	 */
+	XLogBeginInsert();
+	XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+
+	recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
+	PageSetLSN(page, recptr);
+}
+
+
+typedef struct DataPageDeleteStack
+{
+	struct DataPageDeleteStack *child;
+	struct DataPageDeleteStack *parent;
+
+	BlockNumber blkno;			/* current block number */
+	Buffer		leftBuffer;		/* pinned and locked rightest non-deleted page
+								 * on left */
+	bool		isRoot;
+} DataPageDeleteStack;
+
+
+/*
+ * Delete a posting tree page.
+ */
+static void
+ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
+			  BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot)
+{
+	Buffer		dBuffer;
+	Buffer		lBuffer;
+	Buffer		pBuffer;
+	Page		page,
+				parentPage;
+	BlockNumber rightlink;
+
+	/*
+	 * This function MUST be called only if someone of parent pages hold
+	 * exclusive cleanup lock. This guarantees that no insertions currently
+	 * happen in this subtree. Caller also acquires Exclusive locks on
+	 * deletable, parent and left pages.
+	 */
+	lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
+								 RBM_NORMAL, gvs->strategy);
+	dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno,
+								 RBM_NORMAL, gvs->strategy);
+	pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno,
+								 RBM_NORMAL, gvs->strategy);
+
+	page = BufferGetPage(dBuffer);
+	rightlink = GinPageGetOpaque(page)->rightlink;
+
+	/*
+	 * Any insert which would have gone on the leaf block will now go to its
+	 * right sibling.
+	 */
+	PredicateLockPageCombine(gvs->index, deleteBlkno, rightlink);
+
+	START_CRIT_SECTION();
+
+	/* Unlink the page by changing left sibling's rightlink */
+	page = BufferGetPage(lBuffer);
+	GinPageGetOpaque(page)->rightlink = rightlink;
+
+	/* Delete downlink from parent */
+	parentPage = BufferGetPage(pBuffer);
+#ifdef USE_ASSERT_CHECKING
+	do
+	{
+		PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff);
+
+		Assert(PostingItemGetBlockNumber(tod) == deleteBlkno);
+	} while (0);
+#endif
+	GinPageDeletePostingItem(parentPage, myoff);
+
+	page = BufferGetPage(dBuffer);
+
+	/*
+	 * we shouldn't change rightlink field to save workability of running
+	 * search scan
+	 */
+
+	/*
+	 * Mark page as deleted, and remember last xid which could know its
+	 * address.
+	 */
+	GinPageSetDeleted(page);
+	GinPageSetDeleteXid(page, ReadNextTransactionId());
+
+	MarkBufferDirty(pBuffer);
+	MarkBufferDirty(lBuffer);
+	MarkBufferDirty(dBuffer);
+
+	if (RelationNeedsWAL(gvs->index))
+	{
+		XLogRecPtr	recptr;
+		ginxlogDeletePage data;
+
+		/*
+		 * We can't pass REGBUF_STANDARD for the deleted page, because we
+		 * didn't set pd_lower on pre-9.4 versions. The page might've been
+		 * binary-upgraded from an older version, and hence not have pd_lower
+		 * set correctly. Ditto for the left page, but removing the item from
+		 * the parent updated its pd_lower, so we know that's OK at this
+		 * point.
+		 */
+		XLogBeginInsert();
+		XLogRegisterBuffer(0, dBuffer, 0);
+		XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+		XLogRegisterBuffer(2, lBuffer, 0);
+
+		data.parentOffset = myoff;
+		data.rightLink = GinPageGetOpaque(page)->rightlink;
+		data.deleteXid = GinPageGetDeleteXid(page);
+
+		XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
+		PageSetLSN(page, recptr);
+		PageSetLSN(parentPage, recptr);
+		PageSetLSN(BufferGetPage(lBuffer), recptr);
+	}
+
+	ReleaseBuffer(pBuffer);
+	ReleaseBuffer(lBuffer);
+	ReleaseBuffer(dBuffer);
+
+	END_CRIT_SECTION();
+
+	gvs->result->pages_newly_deleted++;
+	gvs->result->pages_deleted++;
+}
+
+
+/*
+ * Scans posting tree and deletes empty pages.  Caller must lock root page for
+ * cleanup.  During scan path from root to current page is kept exclusively
+ * locked.  Also keep left page exclusively locked, because ginDeletePage()
+ * needs it.  If we try to relock left page later, it could deadlock with
+ * ginStepRight().
+ */
+static bool
+ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
+				DataPageDeleteStack *parent, OffsetNumber myoff)
+{
+	DataPageDeleteStack *me;
+	Buffer		buffer;
+	Page		page;
+	bool		meDelete = false;
+	bool		isempty;
+
+	if (isRoot)
+	{
+		me = parent;
+	}
+	else
+	{
+		if (!parent->child)
+		{
+			me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack));
+			me->parent = parent;
+			parent->child = me;
+			me->leftBuffer = InvalidBuffer;
+		}
+		else
+			me = parent->child;
+	}
+
+	buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
+								RBM_NORMAL, gvs->strategy);
+
+	if (!isRoot)
+		LockBuffer(buffer, GIN_EXCLUSIVE);
+
+	page = BufferGetPage(buffer);
+
+	Assert(GinPageIsData(page));
+
+	if (!GinPageIsLeaf(page))
+	{
+		OffsetNumber i;
+
+		me->blkno = blkno;
+		for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
+		{
+			PostingItem *pitem = GinDataPageGetPostingItem(page, i);
+
+			if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), false, me, i))
+				i--;
+		}
+
+		if (GinPageRightMost(page) && BufferIsValid(me->child->leftBuffer))
+		{
+			UnlockReleaseBuffer(me->child->leftBuffer);
+			me->child->leftBuffer = InvalidBuffer;
+		}
+	}
+
+	if (GinPageIsLeaf(page))
+		isempty = GinDataLeafPageIsEmpty(page);
+	else
+		isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber;
+
+	if (isempty)
+	{
+		/* we never delete the left- or rightmost branch */
+		if (BufferIsValid(me->leftBuffer) && !GinPageRightMost(page))
+		{
+			Assert(!isRoot);
+			ginDeletePage(gvs, blkno, BufferGetBlockNumber(me->leftBuffer),
+						  me->parent->blkno, myoff, me->parent->isRoot);
+			meDelete = true;
+		}
+	}
+
+	if (!meDelete)
+	{
+		if (BufferIsValid(me->leftBuffer))
+			UnlockReleaseBuffer(me->leftBuffer);
+		me->leftBuffer = buffer;
+	}
+	else
+	{
+		if (!isRoot)
+			LockBuffer(buffer, GIN_UNLOCK);
+
+		ReleaseBuffer(buffer);
+	}
+
+	if (isRoot)
+		ReleaseBuffer(buffer);
+
+	return meDelete;
+}
+
+
+/*
+ * Scan through posting tree leafs, delete empty tuples.  Returns true if there
+ * is at least one empty page.
+ */
+static bool
+ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno)
+{
+	Buffer		buffer;
+	Page		page;
+	bool		hasVoidPage = false;
+	MemoryContext oldCxt;
+
+	/* Find leftmost leaf page of posting tree and lock it in exclusive mode */
+	while (true)
+	{
+		PostingItem *pitem;
+
+		buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
+									RBM_NORMAL, gvs->strategy);
+		LockBuffer(buffer, GIN_SHARE);
+		page = BufferGetPage(buffer);
+
+		Assert(GinPageIsData(page));
+
+		if (GinPageIsLeaf(page))
+		{
+			LockBuffer(buffer, GIN_UNLOCK);
+			LockBuffer(buffer, GIN_EXCLUSIVE);
+			break;
+		}
+
+		Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
+
+		pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
+		blkno = PostingItemGetBlockNumber(pitem);
+		Assert(blkno != InvalidBlockNumber);
+
+		UnlockReleaseBuffer(buffer);
+	}
+
+	/* Iterate all posting tree leaves using rightlinks and vacuum them */
+	while (true)
+	{
+		oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
+		ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
+		MemoryContextSwitchTo(oldCxt);
+		MemoryContextReset(gvs->tmpCxt);
+
+		if (GinDataLeafPageIsEmpty(page))
+			hasVoidPage = true;
+
+		blkno = GinPageGetOpaque(page)->rightlink;
+
+		UnlockReleaseBuffer(buffer);
+
+		if (blkno == InvalidBlockNumber)
+			break;
+
+		buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
+									RBM_NORMAL, gvs->strategy);
+		LockBuffer(buffer, GIN_EXCLUSIVE);
+		page = BufferGetPage(buffer);
+	}
+
+	return hasVoidPage;
+}
+
+static void
+ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
+{
+	if (ginVacuumPostingTreeLeaves(gvs, rootBlkno))
+	{
+		/*
+		 * There is at least one empty page.  So we have to rescan the tree
+		 * deleting empty pages.
+		 */
+		Buffer		buffer;
+		DataPageDeleteStack root,
+				   *ptr,
+				   *tmp;
+
+		buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, rootBlkno,
+									RBM_NORMAL, gvs->strategy);
+
+		/*
+		 * Lock posting tree root for cleanup to ensure there are no
+		 * concurrent inserts.
+		 */
+		LockBufferForCleanup(buffer);
+
+		memset(&root, 0, sizeof(DataPageDeleteStack));
+		root.leftBuffer = InvalidBuffer;
+		root.isRoot = true;
+
+		ginScanToDelete(gvs, rootBlkno, true, &root, InvalidOffsetNumber);
+
+		ptr = root.child;
+
+		while (ptr)
+		{
+			tmp = ptr->child;
+			pfree(ptr);
+			ptr = tmp;
+		}
+
+		UnlockReleaseBuffer(buffer);
+	}
+}
+
+/*
+ * returns modified page or NULL if page isn't modified.
+ * Function works with original page until first change is occurred,
+ * then page is copied into temporary one.
+ */
+static Page
+ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot)
+{
+	Page		origpage = BufferGetPage(buffer),
+				tmppage;
+	OffsetNumber i,
+				maxoff = PageGetMaxOffsetNumber(origpage);
+
+	tmppage = origpage;
+
+	*nroot = 0;
+
+	for (i = FirstOffsetNumber; i <= maxoff; i++)
+	{
+		IndexTuple	itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
+
+		if (GinIsPostingTree(itup))
+		{
+			/*
+			 * store posting tree's roots for further processing, we can't
+			 * vacuum it just now due to risk of deadlocks with scans/inserts
+			 */
+			roots[*nroot] = GinGetDownlink(itup);
+			(*nroot)++;
+		}
+		else if (GinGetNPosting(itup) > 0)
+		{
+			int			nitems;
+			ItemPointer items_orig;
+			bool		free_items_orig;
+			ItemPointer items;
+
+			/* Get list of item pointers from the tuple. */
+			if (GinItupIsCompressed(itup))
+			{
+				items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems);
+				free_items_orig = true;
+			}
+			else
+			{
+				items_orig = (ItemPointer) GinGetPosting(itup);
+				nitems = GinGetNPosting(itup);
+				free_items_orig = false;
+			}
+
+			/* Remove any items from the list that need to be vacuumed. */
+			items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems);
+
+			if (free_items_orig)
+				pfree(items_orig);
+
+			/* If any item pointers were removed, recreate the tuple. */
+			if (items)
+			{
+				OffsetNumber attnum;
+				Datum		key;
+				GinNullCategory category;
+				GinPostingList *plist;
+				int			plistsize;
+
+				if (nitems > 0)
+				{
+					plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL);
+					plistsize = SizeOfGinPostingList(plist);
+				}
+				else
+				{
+					plist = NULL;
+					plistsize = 0;
+				}
+
+				/*
+				 * if we already created a temporary page, make changes in
+				 * place
+				 */
+				if (tmppage == origpage)
+				{
+					/*
+					 * On first difference, create a temporary copy of the
+					 * page and copy the tuple's posting list to it.
+					 */
+					tmppage = PageGetTempPageCopy(origpage);
+
+					/* set itup pointer to new page */
+					itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
+				}
+
+				attnum = gintuple_get_attrnum(&gvs->ginstate, itup);
+				key = gintuple_get_key(&gvs->ginstate, itup, &category);
+				itup = GinFormTuple(&gvs->ginstate, attnum, key, category,
+									(char *) plist, plistsize,
+									nitems, true);
+				if (plist)
+					pfree(plist);
+				PageIndexTupleDelete(tmppage, i);
+
+				if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i)
+					elog(ERROR, "failed to add item to index page in \"%s\"",
+						 RelationGetRelationName(gvs->index));
+
+				pfree(itup);
+				pfree(items);
+			}
+		}
+	}
+
+	return (tmppage == origpage) ? NULL : tmppage;
+}
+
+IndexBulkDeleteResult *
+ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+			  IndexBulkDeleteCallback callback, void *callback_state)
+{
+	Relation	index = info->index;
+	BlockNumber blkno = GIN_ROOT_BLKNO;
+	GinVacuumState gvs;
+	Buffer		buffer;
+	BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
+	uint32		nRoot;
+
+	gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+									   "Gin vacuum temporary context",
+									   ALLOCSET_DEFAULT_SIZES);
+	gvs.index = index;
+	gvs.callback = callback;
+	gvs.callback_state = callback_state;
+	gvs.strategy = info->strategy;
+	initGinState(&gvs.ginstate, index);
+
+	/* first time through? */
+	if (stats == NULL)
+	{
+		/* Yes, so initialize stats to zeroes */
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+		/*
+		 * and cleanup any pending inserts
+		 */
+		ginInsertCleanup(&gvs.ginstate, !IsAutoVacuumWorkerProcess(),
+						 false, true, stats);
+	}
+
+	/* we'll re-count the tuples each time */
+	stats->num_index_tuples = 0;
+	gvs.result = stats;
+
+	buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+								RBM_NORMAL, info->strategy);
+
+	/* find leaf page */
+	for (;;)
+	{
+		Page		page = BufferGetPage(buffer);
+		IndexTuple	itup;
+
+		LockBuffer(buffer, GIN_SHARE);
+
+		Assert(!GinPageIsData(page));
+
+		if (GinPageIsLeaf(page))
+		{
+			LockBuffer(buffer, GIN_UNLOCK);
+			LockBuffer(buffer, GIN_EXCLUSIVE);
+
+			if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
+			{
+				LockBuffer(buffer, GIN_UNLOCK);
+				continue;		/* check it one more */
+			}
+			break;
+		}
+
+		Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
+
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
+		blkno = GinGetDownlink(itup);
+		Assert(blkno != InvalidBlockNumber);
+
+		UnlockReleaseBuffer(buffer);
+		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+									RBM_NORMAL, info->strategy);
+	}
+
+	/* right now we found leftmost page in entry's BTree */
+
+	for (;;)
+	{
+		Page		page = BufferGetPage(buffer);
+		Page		resPage;
+		uint32		i;
+
+		Assert(!GinPageIsData(page));
+
+		resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);
+
+		blkno = GinPageGetOpaque(page)->rightlink;
+
+		if (resPage)
+		{
+			START_CRIT_SECTION();
+			PageRestoreTempPage(resPage, page);
+			MarkBufferDirty(buffer);
+			xlogVacuumPage(gvs.index, buffer);
+			UnlockReleaseBuffer(buffer);
+			END_CRIT_SECTION();
+		}
+		else
+		{
+			UnlockReleaseBuffer(buffer);
+		}
+
+		vacuum_delay_point();
+
+		for (i = 0; i < nRoot; i++)
+		{
+			ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
+			vacuum_delay_point();
+		}
+
+		if (blkno == InvalidBlockNumber)	/* rightmost page */
+			break;
+
+		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+									RBM_NORMAL, info->strategy);
+		LockBuffer(buffer, GIN_EXCLUSIVE);
+	}
+
+	MemoryContextDelete(gvs.tmpCxt);
+
+	return gvs.result;
+}
+
+IndexBulkDeleteResult *
+ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+	Relation	index = info->index;
+	bool		needLock;
+	BlockNumber npages,
+				blkno;
+	BlockNumber totFreePages;
+	GinState	ginstate;
+	GinStatsData idxStat;
+
+	/*
+	 * In an autovacuum analyze, we want to clean up pending insertions.
+	 * Otherwise, an ANALYZE-only call is a no-op.
+	 */
+	if (info->analyze_only)
+	{
+		if (IsAutoVacuumWorkerProcess())
+		{
+			initGinState(&ginstate, index);
+			ginInsertCleanup(&ginstate, false, true, true, stats);
+		}
+		return stats;
+	}
+
+	/*
+	 * Set up all-zero stats and cleanup pending inserts if ginbulkdelete
+	 * wasn't called
+	 */
+	if (stats == NULL)
+	{
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+		initGinState(&ginstate, index);
+		ginInsertCleanup(&ginstate, !IsAutoVacuumWorkerProcess(),
+						 false, true, stats);
+	}
+
+	memset(&idxStat, 0, sizeof(idxStat));
+
+	/*
+	 * XXX we always report the heap tuple count as the number of index
+	 * entries.  This is bogus if the index is partial, but it's real hard to
+	 * tell how many distinct heap entries are referenced by a GIN index.
+	 */
+	stats->num_index_tuples = Max(info->num_heap_tuples, 0);
+	stats->estimated_count = info->estimated_count;
+
+	/*
+	 * Need lock unless it's local to this backend.
+	 */
+	needLock = !RELATION_IS_LOCAL(index);
+
+	if (needLock)
+		LockRelationForExtension(index, ExclusiveLock);
+	npages = RelationGetNumberOfBlocks(index);
+	if (needLock)
+		UnlockRelationForExtension(index, ExclusiveLock);
+
+	totFreePages = 0;
+
+	for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
+	{
+		Buffer		buffer;
+		Page		page;
+
+		vacuum_delay_point();
+
+		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+									RBM_NORMAL, info->strategy);
+		LockBuffer(buffer, GIN_SHARE);
+		page = (Page) BufferGetPage(buffer);
+
+		if (GinPageIsRecyclable(page))
+		{
+			Assert(blkno != GIN_ROOT_BLKNO);
+			RecordFreeIndexPage(index, blkno);
+			totFreePages++;
+		}
+		else if (GinPageIsData(page))
+		{
+			idxStat.nDataPages++;
+		}
+		else if (!GinPageIsList(page))
+		{
+			idxStat.nEntryPages++;
+
+			if (GinPageIsLeaf(page))
+				idxStat.nEntries += PageGetMaxOffsetNumber(page);
+		}
+
+		UnlockReleaseBuffer(buffer);
+	}
+
+	/* Update the metapage with accurate page and entry counts */
+	idxStat.nTotalPages = npages;
+	ginUpdateStats(info->index, &idxStat, false);
+
+	/* Finally, vacuum the FSM */
+	IndexFreeSpaceMapVacuum(info->index);
+
+	stats->pages_free = totFreePages;
+
+	if (needLock)
+		LockRelationForExtension(index, ExclusiveLock);
+	stats->num_pages = RelationGetNumberOfBlocks(index);
+	if (needLock)
+		UnlockRelationForExtension(index, ExclusiveLock);
+
+	return stats;
+}
+
+/*
+ * Return whether Page can safely be recycled.
+ */
+bool
+GinPageIsRecyclable(Page page)
+{
+	TransactionId delete_xid;
+
+	if (PageIsNew(page))
+		return true;
+
+	if (!GinPageIsDeleted(page))
+		return false;
+
+	delete_xid = GinPageGetDeleteXid(page);
+
+	if (!TransactionIdIsValid(delete_xid))
+		return true;
+
+	/*
+	 * If no backend still could view delete_xid as in running, all scans
+	 * concurrent with ginDeletePage() must have finished.
+	 */
+	return GlobalVisCheckRemovableXid(NULL, delete_xid);
+}
diff --git a/src/backend/access/gin/ginvalidate.c b/src/backend/access/gin/ginvalidate.c
new file mode 100644
index 0000000..d2510da
--- /dev/null
+++ b/src/backend/access/gin/ginvalidate.c
@@ -0,0 +1,338 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginvalidate.c
+ *	  Opclass validator for GIN.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginvalidate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amvalidate.h"
+#include "access/gin_private.h"
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/regproc.h"
+#include "utils/syscache.h"
+
+/*
+ * Validator for a GIN opclass.
+ */
+bool
+ginvalidate(Oid opclassoid)
+{
+	bool		result = true;
+	HeapTuple	classtup;
+	Form_pg_opclass classform;
+	Oid			opfamilyoid;
+	Oid			opcintype;
+	Oid			opckeytype;
+	char	   *opclassname;
+	HeapTuple	familytup;
+	Form_pg_opfamily familyform;
+	char	   *opfamilyname;
+	CatCList   *proclist,
+			   *oprlist;
+	List	   *grouplist;
+	OpFamilyOpFuncGroup *opclassgroup;
+	int			i;
+	ListCell   *lc;
+
+	/* Fetch opclass information */
+	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+	if (!HeapTupleIsValid(classtup))
+		elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+	classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+	opfamilyoid = classform->opcfamily;
+	opcintype = classform->opcintype;
+	opckeytype = classform->opckeytype;
+	if (!OidIsValid(opckeytype))
+		opckeytype = opcintype;
+	opclassname = NameStr(classform->opcname);
+
+	/* Fetch opfamily information */
+	familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
+	if (!HeapTupleIsValid(familytup))
+		elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
+	familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
+
+	opfamilyname = NameStr(familyform->opfname);
+
+	/* Fetch all operators and support functions of the opfamily */
+	oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+	proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+	/* Check individual support functions */
+	for (i = 0; i < proclist->n_members; i++)
+	{
+		HeapTuple	proctup = &proclist->members[i]->tuple;
+		Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+		bool		ok;
+
+		/*
+		 * All GIN support functions should be registered with matching
+		 * left/right types
+		 */
+		if (procform->amproclefttype != procform->amprocrighttype)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains support function %s with different left and right input types",
+							opfamilyname, "gin",
+							format_procedure(procform->amproc))));
+			result = false;
+		}
+
+		/*
+		 * We can't check signatures except within the specific opclass, since
+		 * we need to know the associated opckeytype in many cases.
+		 */
+		if (procform->amproclefttype != opcintype)
+			continue;
+
+		/* Check procedure numbers and function signatures */
+		switch (procform->amprocnum)
+		{
+			case GIN_COMPARE_PROC:
+				ok = check_amproc_signature(procform->amproc, INT4OID, false,
+											2, 2, opckeytype, opckeytype);
+				break;
+			case GIN_EXTRACTVALUE_PROC:
+				/* Some opclasses omit nullFlags */
+				ok = check_amproc_signature(procform->amproc, INTERNALOID, false,
+											2, 3, opcintype, INTERNALOID,
+											INTERNALOID);
+				break;
+			case GIN_EXTRACTQUERY_PROC:
+				/* Some opclasses omit nullFlags and searchMode */
+				ok = check_amproc_signature(procform->amproc, INTERNALOID, false,
+											5, 7, opcintype, INTERNALOID,
+											INT2OID, INTERNALOID, INTERNALOID,
+											INTERNALOID, INTERNALOID);
+				break;
+			case GIN_CONSISTENT_PROC:
+				/* Some opclasses omit queryKeys and nullFlags */
+				ok = check_amproc_signature(procform->amproc, BOOLOID, false,
+											6, 8, INTERNALOID, INT2OID,
+											opcintype, INT4OID,
+											INTERNALOID, INTERNALOID,
+											INTERNALOID, INTERNALOID);
+				break;
+			case GIN_COMPARE_PARTIAL_PROC:
+				ok = check_amproc_signature(procform->amproc, INT4OID, false,
+											4, 4, opckeytype, opckeytype,
+											INT2OID, INTERNALOID);
+				break;
+			case GIN_TRICONSISTENT_PROC:
+				ok = check_amproc_signature(procform->amproc, CHAROID, false,
+											7, 7, INTERNALOID, INT2OID,
+											opcintype, INT4OID,
+											INTERNALOID, INTERNALOID,
+											INTERNALOID);
+				break;
+			case GIN_OPTIONS_PROC:
+				ok = check_amoptsproc_signature(procform->amproc);
+				break;
+			default:
+				ereport(INFO,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("operator family \"%s\" of access method %s contains function %s with invalid support number %d",
+								opfamilyname, "gin",
+								format_procedure(procform->amproc),
+								procform->amprocnum)));
+				result = false;
+				continue;		/* don't want additional message */
+		}
+
+		if (!ok)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
+							opfamilyname, "gin",
+							format_procedure(procform->amproc),
+							procform->amprocnum)));
+			result = false;
+		}
+	}
+
+	/* Check individual operators */
+	for (i = 0; i < oprlist->n_members; i++)
+	{
+		HeapTuple	oprtup = &oprlist->members[i]->tuple;
+		Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+
+		/* TODO: Check that only allowed strategy numbers exist */
+		if (oprform->amopstrategy < 1 || oprform->amopstrategy > 63)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains operator %s with invalid strategy number %d",
+							opfamilyname, "gin",
+							format_operator(oprform->amopopr),
+							oprform->amopstrategy)));
+			result = false;
+		}
+
+		/* gin doesn't support ORDER BY operators */
+		if (oprform->amoppurpose != AMOP_SEARCH ||
+			OidIsValid(oprform->amopsortfamily))
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
+							opfamilyname, "gin",
+							format_operator(oprform->amopopr))));
+			result = false;
+		}
+
+		/* Check operator signature --- same for all gin strategies */
+		if (!check_amop_signature(oprform->amopopr, BOOLOID,
+								  oprform->amoplefttype,
+								  oprform->amoprighttype))
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains operator %s with wrong signature",
+							opfamilyname, "gin",
+							format_operator(oprform->amopopr))));
+			result = false;
+		}
+	}
+
+	/* Now check for inconsistent groups of operators/functions */
+	grouplist = identify_opfamily_groups(oprlist, proclist);
+	opclassgroup = NULL;
+	foreach(lc, grouplist)
+	{
+		OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
+
+		/* Remember the group exactly matching the test opclass */
+		if (thisgroup->lefttype == opcintype &&
+			thisgroup->righttype == opcintype)
+			opclassgroup = thisgroup;
+
+		/*
+		 * There is not a lot we can do to check the operator sets, since each
+		 * GIN opclass is more or less a law unto itself, and some contain
+		 * only operators that are binary-compatible with the opclass datatype
+		 * (meaning that empty operator sets can be OK).  That case also means
+		 * that we shouldn't insist on nonempty function sets except for the
+		 * opclass's own group.
+		 */
+	}
+
+	/* Check that the originally-named opclass is complete */
+	for (i = 1; i <= GINNProcs; i++)
+	{
+		if (opclassgroup &&
+			(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
+			continue;			/* got it */
+		if (i == GIN_COMPARE_PROC || i == GIN_COMPARE_PARTIAL_PROC ||
+			i == GIN_OPTIONS_PROC)
+			continue;			/* optional method */
+		if (i == GIN_CONSISTENT_PROC || i == GIN_TRICONSISTENT_PROC)
+			continue;			/* don't need both, see check below loop */
+		ereport(INFO,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("operator class \"%s\" of access method %s is missing support function %d",
+						opclassname, "gin", i)));
+		result = false;
+	}
+	if (!opclassgroup ||
+		((opclassgroup->functionset & (1 << GIN_CONSISTENT_PROC)) == 0 &&
+		 (opclassgroup->functionset & (1 << GIN_TRICONSISTENT_PROC)) == 0))
+	{
+		ereport(INFO,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("operator class \"%s\" of access method %s is missing support function %d or %d",
+						opclassname, "gin",
+						GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC)));
+		result = false;
+	}
+
+
+	ReleaseCatCacheList(proclist);
+	ReleaseCatCacheList(oprlist);
+	ReleaseSysCache(familytup);
+	ReleaseSysCache(classtup);
+
+	return result;
+}
+
+/*
+ * Prechecking function for adding operators/functions to a GIN opfamily.
+ */
+void
+ginadjustmembers(Oid opfamilyoid,
+				 Oid opclassoid,
+				 List *operators,
+				 List *functions)
+{
+	ListCell   *lc;
+
+	/*
+	 * Operator members of a GIN opfamily should never have hard dependencies,
+	 * since their connection to the opfamily depends only on what the support
+	 * functions think, and that can be altered.  For consistency, we make all
+	 * soft dependencies point to the opfamily, though a soft dependency on
+	 * the opclass would work as well in the CREATE OPERATOR CLASS case.
+	 */
+	foreach(lc, operators)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(lc);
+
+		op->ref_is_hard = false;
+		op->ref_is_family = true;
+		op->refobjid = opfamilyoid;
+	}
+
+	/*
+	 * Required support functions should have hard dependencies.  Preferably
+	 * those are just dependencies on the opclass, but if we're in ALTER
+	 * OPERATOR FAMILY, we leave the dependency pointing at the whole
+	 * opfamily.  (Given that GIN opclasses generally don't share opfamilies,
+	 * it seems unlikely to be worth working harder.)
+	 */
+	foreach(lc, functions)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(lc);
+
+		switch (op->number)
+		{
+			case GIN_EXTRACTVALUE_PROC:
+			case GIN_EXTRACTQUERY_PROC:
+				/* Required support function */
+				op->ref_is_hard = true;
+				break;
+			case GIN_COMPARE_PROC:
+			case GIN_CONSISTENT_PROC:
+			case GIN_COMPARE_PARTIAL_PROC:
+			case GIN_TRICONSISTENT_PROC:
+			case GIN_OPTIONS_PROC:
+				/* Optional, so force it to be a soft family dependency */
+				op->ref_is_hard = false;
+				op->ref_is_family = true;
+				op->refobjid = opfamilyoid;
+				break;
+			default:
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("support function number %d is invalid for access method %s",
+								op->number, "gin")));
+				break;
+		}
+	}
+}
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
new file mode 100644
index 0000000..09ce4d6
--- /dev/null
+++ b/src/backend/access/gin/ginxlog.c
@@ -0,0 +1,813 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginxlog.c
+ *	  WAL replay logic for inverted index.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			 src/backend/access/gin/ginxlog.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/bufmask.h"
+#include "access/gin_private.h"
+#include "access/ginxlog.h"
+#include "access/xlogutils.h"
+#include "utils/memutils.h"
+
+static MemoryContext opCtx;		/* working memory for operations */
+
+static void
+ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Buffer		buffer;
+	Page		page;
+
+	if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
+	{
+		page = (Page) BufferGetPage(buffer);
+		GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoCreatePTree(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
+	char	   *ptr;
+	Buffer		buffer;
+	Page		page;
+
+	buffer = XLogInitBufferForRedo(record, 0);
+	page = (Page) BufferGetPage(buffer);
+
+	GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
+
+	ptr = XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree);
+
+	/* Place page data */
+	memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size);
+
+	GinDataPageSetDataSize(page, data->size);
+
+	PageSetLSN(page, lsn);
+
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
+{
+	Page		page = BufferGetPage(buffer);
+	ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata;
+	OffsetNumber offset = data->offset;
+	IndexTuple	itup;
+
+	if (rightblkno != InvalidBlockNumber)
+	{
+		/* update link to right page after split */
+		Assert(!GinPageIsLeaf(page));
+		Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset));
+		GinSetDownlink(itup, rightblkno);
+	}
+
+	if (data->isDelete)
+	{
+		Assert(GinPageIsLeaf(page));
+		Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
+		PageIndexTupleDelete(page, offset);
+	}
+
+	itup = &data->tuple;
+
+	if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
+	{
+		RelFileNode node;
+		ForkNumber	forknum;
+		BlockNumber blknum;
+
+		BufferGetTag(buffer, &node, &forknum, &blknum);
+		elog(ERROR, "failed to add item to index page in %u/%u/%u",
+			 node.spcNode, node.dbNode, node.relNode);
+	}
+}
+
+/*
+ * Redo recompression of posting list.  Doing all the changes in-place is not
+ * always possible, because it might require more space than we've on the page.
+ * Instead, once modification is required we copy unprocessed tail of the page
+ * into separately allocated chunk of memory for further reading original
+ * versions of segments.  Thanks to that we don't bother about moving page data
+ * in-place.
+ */
+static void
+ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
+{
+	int			actionno;
+	int			segno;
+	GinPostingList *oldseg;
+	Pointer		segmentend;
+	char	   *walbuf;
+	int			totalsize;
+	Pointer		tailCopy = NULL;
+	Pointer		writePtr;
+	Pointer		segptr;
+
+	/*
+	 * If the page is in pre-9.4 format, convert to new format first.
+	 */
+	if (!GinPageIsCompressed(page))
+	{
+		ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page);
+		int			nuncompressed = GinPageGetOpaque(page)->maxoff;
+		int			npacked;
+
+		/*
+		 * Empty leaf pages are deleted as part of vacuum, but leftmost and
+		 * rightmost pages are never deleted.  So, pg_upgrade'd from pre-9.4
+		 * instances might contain empty leaf pages, and we need to handle
+		 * them correctly.
+		 */
+		if (nuncompressed > 0)
+		{
+			GinPostingList *plist;
+
+			plist = ginCompressPostingList(uncompressed, nuncompressed,
+										   BLCKSZ, &npacked);
+			totalsize = SizeOfGinPostingList(plist);
+
+			Assert(npacked == nuncompressed);
+
+			memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
+		}
+		else
+		{
+			totalsize = 0;
+		}
+
+		GinDataPageSetDataSize(page, totalsize);
+		GinPageSetCompressed(page);
+		GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
+	}
+
+	oldseg = GinDataLeafPageGetPostingList(page);
+	writePtr = (Pointer) oldseg;
+	segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
+	segno = 0;
+
+	walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf);
+	for (actionno = 0; actionno < data->nactions; actionno++)
+	{
+		uint8		a_segno = *((uint8 *) (walbuf++));
+		uint8		a_action = *((uint8 *) (walbuf++));
+		GinPostingList *newseg = NULL;
+		int			newsegsize = 0;
+		ItemPointerData *items = NULL;
+		uint16		nitems = 0;
+		ItemPointerData *olditems;
+		int			nolditems;
+		ItemPointerData *newitems;
+		int			nnewitems;
+		int			segsize;
+
+		/* Extract all the information we need from the WAL record */
+		if (a_action == GIN_SEGMENT_INSERT ||
+			a_action == GIN_SEGMENT_REPLACE)
+		{
+			newseg = (GinPostingList *) walbuf;
+			newsegsize = SizeOfGinPostingList(newseg);
+			walbuf += SHORTALIGN(newsegsize);
+		}
+
+		if (a_action == GIN_SEGMENT_ADDITEMS)
+		{
+			memcpy(&nitems, walbuf, sizeof(uint16));
+			walbuf += sizeof(uint16);
+			items = (ItemPointerData *) walbuf;
+			walbuf += nitems * sizeof(ItemPointerData);
+		}
+
+		/* Skip to the segment that this action concerns */
+		Assert(segno <= a_segno);
+		while (segno < a_segno)
+		{
+			/*
+			 * Once modification is started and page tail is copied, we've to
+			 * copy unmodified segments.
+			 */
+			segsize = SizeOfGinPostingList(oldseg);
+			if (tailCopy)
+			{
+				Assert(writePtr + segsize < PageGetSpecialPointer(page));
+				memcpy(writePtr, (Pointer) oldseg, segsize);
+			}
+			writePtr += segsize;
+			oldseg = GinNextPostingListSegment(oldseg);
+			segno++;
+		}
+
+		/*
+		 * ADDITEMS action is handled like REPLACE, but the new segment to
+		 * replace the old one is reconstructed using the old segment from
+		 * disk and the new items from the WAL record.
+		 */
+		if (a_action == GIN_SEGMENT_ADDITEMS)
+		{
+			int			npacked;
+
+			olditems = ginPostingListDecode(oldseg, &nolditems);
+
+			newitems = ginMergeItemPointers(items, nitems,
+											olditems, nolditems,
+											&nnewitems);
+			Assert(nnewitems == nolditems + nitems);
+
+			newseg = ginCompressPostingList(newitems, nnewitems,
+											BLCKSZ, &npacked);
+			Assert(npacked == nnewitems);
+
+			newsegsize = SizeOfGinPostingList(newseg);
+			a_action = GIN_SEGMENT_REPLACE;
+		}
+
+		segptr = (Pointer) oldseg;
+		if (segptr != segmentend)
+			segsize = SizeOfGinPostingList(oldseg);
+		else
+		{
+			/*
+			 * Positioned after the last existing segment. Only INSERTs
+			 * expected here.
+			 */
+			Assert(a_action == GIN_SEGMENT_INSERT);
+			segsize = 0;
+		}
+
+		/*
+		 * We're about to start modification of the page.  So, copy tail of
+		 * the page if it's not done already.
+		 */
+		if (!tailCopy && segptr != segmentend)
+		{
+			int			tailSize = segmentend - segptr;
+
+			tailCopy = (Pointer) palloc(tailSize);
+			memcpy(tailCopy, segptr, tailSize);
+			segptr = tailCopy;
+			oldseg = (GinPostingList *) segptr;
+			segmentend = segptr + tailSize;
+		}
+
+		switch (a_action)
+		{
+			case GIN_SEGMENT_DELETE:
+				segptr += segsize;
+				segno++;
+				break;
+
+			case GIN_SEGMENT_INSERT:
+				/* copy the new segment in place */
+				Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
+				memcpy(writePtr, newseg, newsegsize);
+				writePtr += newsegsize;
+				break;
+
+			case GIN_SEGMENT_REPLACE:
+				/* copy the new version of segment in place */
+				Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
+				memcpy(writePtr, newseg, newsegsize);
+				writePtr += newsegsize;
+				segptr += segsize;
+				segno++;
+				break;
+
+			default:
+				elog(ERROR, "unexpected GIN leaf action: %u", a_action);
+		}
+		oldseg = (GinPostingList *) segptr;
+	}
+
+	/* Copy the rest of unmodified segments if any. */
+	segptr = (Pointer) oldseg;
+	if (segptr != segmentend && tailCopy)
+	{
+		int			restSize = segmentend - segptr;
+
+		Assert(writePtr + restSize <= PageGetSpecialPointer(page));
+		memcpy(writePtr, segptr, restSize);
+		writePtr += restSize;
+	}
+
+	totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page);
+	GinDataPageSetDataSize(page, totalsize);
+}
+
+static void
+ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
+{
+	Page		page = BufferGetPage(buffer);
+
+	if (isLeaf)
+	{
+		ginxlogRecompressDataLeaf *data = (ginxlogRecompressDataLeaf *) rdata;
+
+		Assert(GinPageIsLeaf(page));
+
+		ginRedoRecompress(page, data);
+	}
+	else
+	{
+		ginxlogInsertDataInternal *data = (ginxlogInsertDataInternal *) rdata;
+		PostingItem *oldpitem;
+
+		Assert(!GinPageIsLeaf(page));
+
+		/* update link to right page after split */
+		oldpitem = GinDataPageGetPostingItem(page, data->offset);
+		PostingItemSetBlockNumber(oldpitem, rightblkno);
+
+		GinDataPageAddPostingItem(page, &data->newitem, data->offset);
+	}
+}
+
+static void
+ginRedoInsert(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
+	Buffer		buffer;
+#ifdef NOT_USED
+	BlockNumber leftChildBlkno = InvalidBlockNumber;
+#endif
+	BlockNumber rightChildBlkno = InvalidBlockNumber;
+	bool		isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
+
+	/*
+	 * First clear incomplete-split flag on child page if this finishes a
+	 * split.
+	 */
+	if (!isLeaf)
+	{
+		char	   *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+#ifdef NOT_USED
+		leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+#endif
+		payload += sizeof(BlockIdData);
+		rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+		payload += sizeof(BlockIdData);
+
+		ginRedoClearIncompleteSplit(record, 1);
+	}
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		Page		page = BufferGetPage(buffer);
+		Size		len;
+		char	   *payload = XLogRecGetBlockData(record, 0, &len);
+
+		/* How to insert the payload is tree-type specific */
+		if (data->flags & GIN_INSERT_ISDATA)
+		{
+			Assert(GinPageIsData(page));
+			ginRedoInsertData(buffer, isLeaf, rightChildBlkno, payload);
+		}
+		else
+		{
+			Assert(!GinPageIsData(page));
+			ginRedoInsertEntry(buffer, isLeaf, rightChildBlkno, payload);
+		}
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoSplit(XLogReaderState *record)
+{
+	ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
+	Buffer		lbuffer,
+				rbuffer,
+				rootbuf;
+	bool		isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
+	bool		isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
+
+	/*
+	 * First clear incomplete-split flag on child page if this finishes a
+	 * split
+	 */
+	if (!isLeaf)
+		ginRedoClearIncompleteSplit(record, 3);
+
+	if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
+		elog(ERROR, "GIN split record did not contain a full-page image of left page");
+
+	if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
+		elog(ERROR, "GIN split record did not contain a full-page image of right page");
+
+	if (isRoot)
+	{
+		if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
+			elog(ERROR, "GIN split record did not contain a full-page image of root page");
+		UnlockReleaseBuffer(rootbuf);
+	}
+
+	UnlockReleaseBuffer(rbuffer);
+	UnlockReleaseBuffer(lbuffer);
+}
+
+/*
+ * VACUUM_PAGE record contains simply a full image of the page, similar to
+ * an XLOG_FPI record.
+ */
+static void
+ginRedoVacuumPage(XLogReaderState *record)
+{
+	Buffer		buffer;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
+	{
+		elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
+	}
+	UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoVacuumDataLeafPage(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Buffer		buffer;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		Page		page = BufferGetPage(buffer);
+		Size		len;
+		ginxlogVacuumDataLeafPage *xlrec;
+
+		xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
+
+		Assert(GinPageIsLeaf(page));
+		Assert(GinPageIsData(page));
+
+		ginRedoRecompress(page, &xlrec->data);
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeletePage(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
+	Buffer		dbuffer;
+	Buffer		pbuffer;
+	Buffer		lbuffer;
+	Page		page;
+
+	/*
+	 * Lock left page first in order to prevent possible deadlock with
+	 * ginStepRight().
+	 */
+	if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
+	{
+		page = BufferGetPage(lbuffer);
+		Assert(GinPageIsData(page));
+		GinPageGetOpaque(page)->rightlink = data->rightLink;
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(lbuffer);
+	}
+
+	if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
+	{
+		page = BufferGetPage(dbuffer);
+		Assert(GinPageIsData(page));
+		GinPageSetDeleted(page);
+		GinPageSetDeleteXid(page, data->deleteXid);
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(dbuffer);
+	}
+
+	if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
+	{
+		page = BufferGetPage(pbuffer);
+		Assert(GinPageIsData(page));
+		Assert(!GinPageIsLeaf(page));
+		GinPageDeletePostingItem(page, data->parentOffset);
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(pbuffer);
+	}
+
+	if (BufferIsValid(lbuffer))
+		UnlockReleaseBuffer(lbuffer);
+	if (BufferIsValid(pbuffer))
+		UnlockReleaseBuffer(pbuffer);
+	if (BufferIsValid(dbuffer))
+		UnlockReleaseBuffer(dbuffer);
+}
+
+static void
+ginRedoUpdateMetapage(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
+	Buffer		metabuffer;
+	Page		metapage;
+	Buffer		buffer;
+
+	/*
+	 * Restore the metapage. This is essentially the same as a full-page
+	 * image, so restore the metapage unconditionally without looking at the
+	 * LSN, to avoid torn page hazards.
+	 */
+	metabuffer = XLogInitBufferForRedo(record, 0);
+	Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
+	metapage = BufferGetPage(metabuffer);
+
+	GinInitMetabuffer(metabuffer);
+	memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+	PageSetLSN(metapage, lsn);
+	MarkBufferDirty(metabuffer);
+
+	if (data->ntuples > 0)
+	{
+		/*
+		 * insert into tail page
+		 */
+		if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
+		{
+			Page		page = BufferGetPage(buffer);
+			OffsetNumber off;
+			int			i;
+			Size		tupsize;
+			char	   *payload;
+			IndexTuple	tuples;
+			Size		totaltupsize;
+
+			payload = XLogRecGetBlockData(record, 1, &totaltupsize);
+			tuples = (IndexTuple) payload;
+
+			if (PageIsEmpty(page))
+				off = FirstOffsetNumber;
+			else
+				off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+			for (i = 0; i < data->ntuples; i++)
+			{
+				tupsize = IndexTupleSize(tuples);
+
+				if (PageAddItem(page, (Item) tuples, tupsize, off,
+								false, false) == InvalidOffsetNumber)
+					elog(ERROR, "failed to add item to index page");
+
+				tuples = (IndexTuple) (((char *) tuples) + tupsize);
+
+				off++;
+			}
+			Assert(payload + totaltupsize == (char *) tuples);
+
+			/*
+			 * Increase counter of heap tuples
+			 */
+			GinPageGetOpaque(page)->maxoff++;
+
+			PageSetLSN(page, lsn);
+			MarkBufferDirty(buffer);
+		}
+		if (BufferIsValid(buffer))
+			UnlockReleaseBuffer(buffer);
+	}
+	else if (data->prevTail != InvalidBlockNumber)
+	{
+		/*
+		 * New tail
+		 */
+		if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
+		{
+			Page		page = BufferGetPage(buffer);
+
+			GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+			PageSetLSN(page, lsn);
+			MarkBufferDirty(buffer);
+		}
+		if (BufferIsValid(buffer))
+			UnlockReleaseBuffer(buffer);
+	}
+
+	UnlockReleaseBuffer(metabuffer);
+}
+
+static void
+ginRedoInsertListPage(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	OffsetNumber l,
+				off = FirstOffsetNumber;
+	int			i,
+				tupsize;
+	char	   *payload;
+	IndexTuple	tuples;
+	Size		totaltupsize;
+
+	/* We always re-initialize the page. */
+	buffer = XLogInitBufferForRedo(record, 0);
+	page = BufferGetPage(buffer);
+
+	GinInitBuffer(buffer, GIN_LIST);
+	GinPageGetOpaque(page)->rightlink = data->rightlink;
+	if (data->rightlink == InvalidBlockNumber)
+	{
+		/* tail of sublist */
+		GinPageSetFullRow(page);
+		GinPageGetOpaque(page)->maxoff = 1;
+	}
+	else
+	{
+		GinPageGetOpaque(page)->maxoff = 0;
+	}
+
+	payload = XLogRecGetBlockData(record, 0, &totaltupsize);
+
+	tuples = (IndexTuple) payload;
+	for (i = 0; i < data->ntuples; i++)
+	{
+		tupsize = IndexTupleSize(tuples);
+
+		l = PageAddItem(page, (Item) tuples, tupsize, off, false, false);
+
+		if (l == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to index page");
+
+		tuples = (IndexTuple) (((char *) tuples) + tupsize);
+		off++;
+	}
+	Assert((char *) tuples == payload + totaltupsize);
+
+	PageSetLSN(page, lsn);
+	MarkBufferDirty(buffer);
+
+	UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeleteListPages(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
+	Buffer		metabuffer;
+	Page		metapage;
+	int			i;
+
+	metabuffer = XLogInitBufferForRedo(record, 0);
+	Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
+	metapage = BufferGetPage(metabuffer);
+
+	GinInitMetabuffer(metabuffer);
+
+	memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+	PageSetLSN(metapage, lsn);
+	MarkBufferDirty(metabuffer);
+
+	/*
+	 * In normal operation, shiftList() takes exclusive lock on all the
+	 * pages-to-be-deleted simultaneously.  During replay, however, it should
+	 * be all right to lock them one at a time.  This is dependent on the fact
+	 * that we are deleting pages from the head of the list, and that readers
+	 * share-lock the next page before releasing the one they are on. So we
+	 * cannot get past a reader that is on, or due to visit, any page we are
+	 * going to delete.  New incoming readers will block behind our metapage
+	 * lock and then see a fully updated page list.
+	 *
+	 * No full-page images are taken of the deleted pages. Instead, they are
+	 * re-initialized as empty, deleted pages. Their right-links don't need to
+	 * be preserved, because no new readers can see the pages, as explained
+	 * above.
+	 */
+	for (i = 0; i < data->ndeleted; i++)
+	{
+		Buffer		buffer;
+		Page		page;
+
+		buffer = XLogInitBufferForRedo(record, i + 1);
+		page = BufferGetPage(buffer);
+		GinInitBuffer(buffer, GIN_DELETED);
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+
+		UnlockReleaseBuffer(buffer);
+	}
+	UnlockReleaseBuffer(metabuffer);
+}
+
+void
+gin_redo(XLogReaderState *record)
+{
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	MemoryContext oldCtx;
+
+	/*
+	 * GIN indexes do not require any conflict processing. NB: If we ever
+	 * implement a similar optimization as we have in b-tree, and remove
+	 * killed tuples outside VACUUM, we'll need to handle that here.
+	 */
+
+	oldCtx = MemoryContextSwitchTo(opCtx);
+	switch (info)
+	{
+		case XLOG_GIN_CREATE_PTREE:
+			ginRedoCreatePTree(record);
+			break;
+		case XLOG_GIN_INSERT:
+			ginRedoInsert(record);
+			break;
+		case XLOG_GIN_SPLIT:
+			ginRedoSplit(record);
+			break;
+		case XLOG_GIN_VACUUM_PAGE:
+			ginRedoVacuumPage(record);
+			break;
+		case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
+			ginRedoVacuumDataLeafPage(record);
+			break;
+		case XLOG_GIN_DELETE_PAGE:
+			ginRedoDeletePage(record);
+			break;
+		case XLOG_GIN_UPDATE_META_PAGE:
+			ginRedoUpdateMetapage(record);
+			break;
+		case XLOG_GIN_INSERT_LISTPAGE:
+			ginRedoInsertListPage(record);
+			break;
+		case XLOG_GIN_DELETE_LISTPAGE:
+			ginRedoDeleteListPages(record);
+			break;
+		default:
+			elog(PANIC, "gin_redo: unknown op code %u", info);
+	}
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextReset(opCtx);
+}
+
+void
+gin_xlog_startup(void)
+{
+	opCtx = AllocSetContextCreate(CurrentMemoryContext,
+								  "GIN recovery temporary context",
+								  ALLOCSET_DEFAULT_SIZES);
+}
+
+void
+gin_xlog_cleanup(void)
+{
+	MemoryContextDelete(opCtx);
+	opCtx = NULL;
+}
+
+/*
+ * Mask a GIN page before running consistency checks on it.
+ */
+void
+gin_mask(char *pagedata, BlockNumber blkno)
+{
+	Page		page = (Page) pagedata;
+	PageHeader	pagehdr = (PageHeader) page;
+	GinPageOpaque opaque;
+
+	mask_page_lsn_and_checksum(page);
+	opaque = GinPageGetOpaque(page);
+
+	mask_page_hint_bits(page);
+
+	/*
+	 * For a GIN_DELETED page, the page is initialized to empty.  Hence, mask
+	 * the whole page content.  For other pages, mask the hole if pd_lower
+	 * appears to have been set correctly.
+	 */
+	if (opaque->flags & GIN_DELETED)
+		mask_page_content(page);
+	else if (pagehdr->pd_lower > SizeOfPageHeaderData)
+		mask_unused_space(page);
+}