From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 4 May 2024 14:17:33 +0200
Subject: Adding upstream version 15.5.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 doc/src/sgml/gin.sgml | 717 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 717 insertions(+)
 create mode 100644 doc/src/sgml/gin.sgml

(limited to 'doc/src/sgml/gin.sgml')
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
new file mode 100644
index 0000000..d68d12d
--- /dev/null
+++ b/doc/src/sgml/gin.sgml
@@ -0,0 +1,717 @@
+<!-- doc/src/sgml/gin.sgml -->
+
+<chapter id="gin">
+<title>GIN Indexes</title>
+
+   <indexterm>
+    <primary>index</primary>
+    <secondary>GIN</secondary>
+   </indexterm>
+
+<sect1 id="gin-intro">
+ <title>Introduction</title>
+
+ <para>
+  <acronym>GIN</acronym> stands for Generalized Inverted Index.
+  <acronym>GIN</acronym> is designed for handling cases where the items
+  to be indexed are composite values, and the queries to be handled by
+  the index need to search for element values that appear within
+  the composite items.  For example, the items could be documents,
+  and the queries could be searches for documents containing specific words.
+ </para>
+
+ <para>
+  We use the word <firstterm>item</firstterm> to refer to a composite value that
+  is to be indexed, and the word <firstterm>key</firstterm> to refer to an element
+  value.  <acronym>GIN</acronym> always stores and searches for keys,
+  not item values per se.
+ </para>
+
+ <para>
+  A <acronym>GIN</acronym> index stores a set of (key, posting list) pairs,
+  where a <firstterm>posting list</firstterm> is a set of row IDs in which the key
+  occurs.  The same row ID can appear in multiple posting lists, since
+  an item can contain more than one key.  Each key value is stored only
+  once, so a <acronym>GIN</acronym> index is very compact for cases
+  where the same key appears many times.
+ </para>
+
+ <para>
+  <acronym>GIN</acronym> is generalized in the sense that the
+  <acronym>GIN</acronym> access method code does not need to know the
+  specific operations that it accelerates.
+  Instead, it uses custom strategies defined for particular data types.
+  The strategy defines how keys are extracted from indexed items and
+  query conditions, and how to determine whether a row that contains
+  some of the key values in a query actually satisfies the query.
+ </para>
+
+ <para>
+  One advantage of <acronym>GIN</acronym> is that it allows the development
+  of custom data types with the appropriate access methods, by
+  an expert in the domain of the data type, rather than a database expert.
+  This is much the same advantage as using <acronym>GiST</acronym>.
+ </para>
+
+ <para>
+  The <acronym>GIN</acronym>
+  implementation in <productname>PostgreSQL</productname> is primarily
+  maintained by Teodor Sigaev and Oleg Bartunov. There is more
+  information about <acronym>GIN</acronym> on their
+  <ulink url="http://www.sai.msu.su/~megera/wiki/Gin">website</ulink>.
+ </para>
+</sect1>
+
+<sect1 id="gin-builtin-opclasses">
+ <title>Built-in Operator Classes</title>
+
+ <para>
+  The core <productname>PostgreSQL</productname> distribution
+  includes the <acronym>GIN</acronym> operator classes shown in
+  <xref linkend="gin-builtin-opclasses-table"/>.
+  (Some of the optional modules described in <xref linkend="contrib"/>
+  provide additional <acronym>GIN</acronym> operator classes.)
+ </para>
+
+  <table id="gin-builtin-opclasses-table">
+   <title>Built-in <acronym>GIN</acronym> Operator Classes</title>
+   <tgroup cols="2">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Indexable Operators</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry morerows="3" valign="middle"><literal>array_ops</literal></entry>
+      <entry><literal>&amp;&amp; (anyarray,anyarray)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@&gt; (anyarray,anyarray)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>&lt;@ (anyarray,anyarray)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>= (anyarray,anyarray)</literal></entry>
+     </row>
+     <row>
+      <entry morerows="5" valign="middle"><literal>jsonb_ops</literal></entry>
+      <entry><literal>@&gt; (jsonb,jsonb)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@? (jsonb,jsonpath)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@@ (jsonb,jsonpath)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>? (jsonb,text)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>?| (jsonb,text[])</literal></entry>
+     </row>
+     <row>
+      <entry><literal>?&amp; (jsonb,text[])</literal></entry>
+     </row>
+     <row>
+      <entry morerows="2" valign="middle"><literal>jsonb_path_ops</literal></entry>
+      <entry><literal>@&gt; (jsonb,jsonb)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@? (jsonb,jsonpath)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@@ (jsonb,jsonpath)</literal></entry>
+     </row>
+     <row>
+      <entry morerows="1" valign="middle"><literal>tsvector_ops</literal></entry>
+      <entry><literal>@@ (tsvector,tsquery)</literal></entry>
+     </row>
+     <row>
+      <entry><literal>@@@ (tsvector,tsquery)</literal></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ <para>
+  Of the two operator classes for type <type>jsonb</type>, <literal>jsonb_ops</literal>
+  is the default.  <literal>jsonb_path_ops</literal> supports fewer operators but
+  offers better performance for those operators.
+  See <xref linkend="json-indexing"/> for details.
+ </para>
+
+</sect1>
+
+<sect1 id="gin-extensibility">
+ <title>Extensibility</title>
+
+ <para>
+   The <acronym>GIN</acronym> interface has a high level of abstraction,
+   requiring the access method implementer only to implement the semantics of
+   the data type being accessed.  The <acronym>GIN</acronym> layer itself
+   takes care of concurrency, logging and searching the tree structure.
+ </para>
+
+ <para>
+   All it takes to get a <acronym>GIN</acronym> access method working is to
+   implement a few user-defined methods, which define the behavior of
+   keys in the tree and the relationships between keys, indexed items,
+   and indexable queries. In short, <acronym>GIN</acronym> combines
+   extensibility with generality, code reuse, and a clean interface.
+ </para>
+
+ <para>
+   There are two methods that an operator class for
+   <acronym>GIN</acronym> must provide:
+
+  <variablelist>
+    <varlistentry>
+     <term><function>Datum *extractValue(Datum itemValue, int32 *nkeys,
+        bool **nullFlags)</function></term>
+     <listitem>
+      <para>
+       Returns a palloc'd array of keys given an item to be indexed.  The
+       number of returned keys must be stored into <literal>*nkeys</literal>.
+       If any of the keys can be null, also palloc an array of
+       <literal>*nkeys</literal> <type>bool</type> fields, store its address at
+       <literal>*nullFlags</literal>, and set these null flags as needed.
+       <literal>*nullFlags</literal> can be left <symbol>NULL</symbol> (its initial value)
+       if all keys are non-null.
+       The return value can be <symbol>NULL</symbol> if the item contains no keys.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><function>Datum *extractQuery(Datum query, int32 *nkeys,
+        StrategyNumber n, bool **pmatch, Pointer **extra_data,
+        bool **nullFlags, int32 *searchMode)</function></term>
+     <listitem>
+      <para>
+       Returns a palloc'd array of keys given a value to be queried; that is,
+       <literal>query</literal> is the value on the right-hand side of an
+       indexable operator whose left-hand side is the indexed column.
+       <literal>n</literal> is the strategy number of the operator within the
+       operator class (see <xref linkend="xindex-strategies"/>).
+       Often, <function>extractQuery</function> will need
+       to consult <literal>n</literal> to determine the data type of
+       <literal>query</literal> and the method it should use to extract key values.
+       The number of returned keys must be stored into <literal>*nkeys</literal>.
+       If any of the keys can be null, also palloc an array of
+       <literal>*nkeys</literal> <type>bool</type> fields, store its address at
+       <literal>*nullFlags</literal>, and set these null flags as needed.
+       <literal>*nullFlags</literal> can be left <symbol>NULL</symbol> (its initial value)
+       if all keys are non-null.
+       The return value can be <symbol>NULL</symbol> if the <literal>query</literal> contains no keys.
+      </para>
+
+      <para>
+       <literal>searchMode</literal> is an output argument that allows
+       <function>extractQuery</function> to specify details about how the search
+       will be done.
+       If <literal>*searchMode</literal> is set to
+       <literal>GIN_SEARCH_MODE_DEFAULT</literal> (which is the value it is
+       initialized to before call), only items that match at least one of
+       the returned keys are considered candidate matches.
+       If <literal>*searchMode</literal> is set to
+       <literal>GIN_SEARCH_MODE_INCLUDE_EMPTY</literal>, then in addition to items
+       containing at least one matching key, items that contain no keys at
+       all are considered candidate matches.  (This mode is useful for
+       implementing is-subset-of operators, for example.)
+       If <literal>*searchMode</literal> is set to <literal>GIN_SEARCH_MODE_ALL</literal>,
+       then all non-null items in the index are considered candidate
+       matches, whether they match any of the returned keys or not.  (This
+       mode is much slower than the other two choices, since it requires
+       scanning essentially the entire index, but it may be necessary to
+       implement corner cases correctly.  An operator that needs this mode
+       in most cases is probably not a good candidate for a GIN operator
+       class.)
+       The symbols to use for setting this mode are defined in
+       <filename>access/gin.h</filename>.
+      </para>
+
+      <para>
+       <literal>pmatch</literal> is an output argument for use when partial match
+       is supported.  To use it, <function>extractQuery</function> must allocate
+       an array of <literal>*nkeys</literal> <type>bool</type>s and store its address at
+       <literal>*pmatch</literal>.  Each element of the array should be set to true
+       if the corresponding key requires partial match, false if not.
+       If <literal>*pmatch</literal> is set to <symbol>NULL</symbol> then GIN assumes partial match
+       is not required.  The variable is initialized to <symbol>NULL</symbol> before call,
+       so this argument can simply be ignored by operator classes that do
+       not support partial match.
+      </para>
+
+      <para>
+       <literal>extra_data</literal> is an output argument that allows
+       <function>extractQuery</function> to pass additional data to the
+       <function>consistent</function> and <function>comparePartial</function> methods.
+       To use it, <function>extractQuery</function> must allocate
+       an array of <literal>*nkeys</literal> pointers and store its address at
+       <literal>*extra_data</literal>, then store whatever it wants to into the
+       individual pointers.  The variable is initialized to <symbol>NULL</symbol> before
+       call, so this argument can simply be ignored by operator classes that
+       do not require extra data.  If <literal>*extra_data</literal> is set, the
+       whole array is passed to the <function>consistent</function> method, and
+       the appropriate element to the <function>comparePartial</function> method.
+      </para>
+
+     </listitem>
+    </varlistentry>
+  </variablelist>
+
+  An operator class must also provide a function to check if an indexed item
+  matches the query. It comes in two flavors, a Boolean <function>consistent</function>
+  function, and a ternary <function>triConsistent</function> function.
+  <function>triConsistent</function> covers the functionality of both, so providing
+  <function>triConsistent</function> alone is sufficient. However, if the Boolean
+  variant is significantly cheaper to calculate, it can be advantageous to
+  provide both.  If only the Boolean variant is provided, some optimizations
+  that depend on refuting index items before fetching all the keys are
+  disabled.
+
+  <variablelist>
+    <varlistentry>
+     <term><function>bool consistent(bool check[], StrategyNumber n, Datum query,
+        int32 nkeys, Pointer extra_data[], bool *recheck,
+        Datum queryKeys[], bool nullFlags[])</function></term>
+     <listitem>
+      <para>
+       Returns true if an indexed item satisfies the query operator with
+       strategy number <literal>n</literal> (or might satisfy it, if the recheck
+       indication is returned).  This function does not have direct access
+       to the indexed item's value, since <acronym>GIN</acronym> does not
+       store items explicitly.  Rather, what is available is knowledge
+       about which key values extracted from the query appear in a given
+       indexed item.  The <literal>check</literal> array has length
+       <literal>nkeys</literal>, which is the same as the number of keys previously
+       returned by <function>extractQuery</function> for this <literal>query</literal> datum.
+       Each element of the
+       <literal>check</literal> array is true if the indexed item contains the
+       corresponding query key, i.e., if (check[i] == true) the i-th key of the
+       <function>extractQuery</function> result array is present in the indexed item.
+       The original <literal>query</literal> datum is
+       passed in case the <function>consistent</function> method needs to consult it,
+       and so are the <literal>queryKeys[]</literal> and <literal>nullFlags[]</literal>
+       arrays previously returned by <function>extractQuery</function>.
+       <literal>extra_data</literal> is the extra-data array returned by
+       <function>extractQuery</function>, or <symbol>NULL</symbol> if none.
+      </para>
+
+      <para>
+       When <function>extractQuery</function> returns a null key in
+       <literal>queryKeys[]</literal>, the corresponding <literal>check[]</literal> element
+       is true if the indexed item contains a null key; that is, the
+       semantics of <literal>check[]</literal> are like <literal>IS NOT DISTINCT
+       FROM</literal>.  The <function>consistent</function> function can examine the
+       corresponding <literal>nullFlags[]</literal> element if it needs to tell
+       the difference between a regular value match and a null match.
+      </para>
+
+      <para>
+       On success, <literal>*recheck</literal> should be set to true if the heap
+       tuple needs to be rechecked against the query operator, or false if
+       the index test is exact.  That is, a false return value guarantees
+       that the heap tuple does not match the query; a true return value with
+       <literal>*recheck</literal> set to false guarantees that the heap tuple does
+       match the query; and a true return value with
+       <literal>*recheck</literal> set to true means that the heap tuple might match
+       the query, so it needs to be fetched and rechecked by evaluating the
+       query operator directly against the originally indexed item.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><function>GinTernaryValue triConsistent(GinTernaryValue check[], StrategyNumber n, Datum query,
+        int32 nkeys, Pointer extra_data[],
+        Datum queryKeys[], bool nullFlags[])</function></term>
+     <listitem>
+      <para>
+       <function>triConsistent</function> is similar to <function>consistent</function>,
+       but instead of Booleans in the <literal>check</literal> vector, there are
+       three possible values for each
+       key: <literal>GIN_TRUE</literal>, <literal>GIN_FALSE</literal> and
+       <literal>GIN_MAYBE</literal>. <literal>GIN_FALSE</literal> and <literal>GIN_TRUE</literal>
+       have the same meaning as regular Boolean values, while
+       <literal>GIN_MAYBE</literal> means that the presence of that key is not known.
+       When <literal>GIN_MAYBE</literal> values are present, the function should only
+       return <literal>GIN_TRUE</literal> if the item certainly matches whether or
+       not the index item contains the corresponding query keys. Likewise, the
+       function must return <literal>GIN_FALSE</literal> only if the item certainly
+       does not match, whether or not it contains the <literal>GIN_MAYBE</literal>
+       keys. If the result depends on the <literal>GIN_MAYBE</literal> entries, i.e.,
+       the match cannot be confirmed or refuted based on the known query keys,
+       the function must return <literal>GIN_MAYBE</literal>.
+      </para>
+      <para>
+       When there are no <literal>GIN_MAYBE</literal> values in the <literal>check</literal>
+       vector, a <literal>GIN_MAYBE</literal> return value is the equivalent of
+       setting the <literal>recheck</literal> flag in the
+       Boolean <function>consistent</function> function.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+ </para>
+
+ <para>
+  In addition, GIN must have a way to sort the key values stored in the index.
+  The operator class can define the sort ordering by specifying a comparison
+  method:
+
+  <variablelist>
+    <varlistentry>
+     <term><function>int compare(Datum a, Datum b)</function></term>
+     <listitem>
+      <para>
+       Compares two keys (not indexed items!) and returns an integer less than
+       zero, zero, or greater than zero, indicating whether the first key is
+       less than, equal to, or greater than the second.  Null keys are never
+       passed to this function.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+
+  Alternatively, if the operator class does not provide a <function>compare</function>
+  method, GIN will look up the default btree operator class for the index
+  key data type, and use its comparison function.  It is recommended to
+  specify the comparison function in a GIN operator class that is meant for
+  just one data type, as looking up the btree operator class costs a few
+  cycles.  However, polymorphic GIN operator classes (such
+  as <literal>array_ops</literal>) typically cannot specify a single comparison
+  function.
+ </para>
+
+ <para>
+  An operator class for <acronym>GIN</acronym> can optionally supply the
+  following methods:
+
+  <variablelist>
+    <varlistentry>
+     <term><function>int comparePartial(Datum partial_key, Datum key, StrategyNumber n,
+                              Pointer extra_data)</function></term>
+     <listitem>
+      <para>
+       Compare a partial-match query key to an index key.  Returns an integer
+       whose sign indicates the result: less than zero means the index key
+       does not match the query, but the index scan should continue; zero
+       means that the index key does match the query; greater than zero
+       indicates that the index scan should stop because no more matches
+       are possible.  The strategy number <literal>n</literal> of the operator
+       that generated the partial match query is provided, in case its
+       semantics are needed to determine when to end the scan.  Also,
+       <literal>extra_data</literal> is the corresponding element of the extra-data
+       array made by <function>extractQuery</function>, or <symbol>NULL</symbol> if none.
+       Null keys are never passed to this function.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry>
+     <term><function>void options(local_relopts *relopts)</function></term>
+     <listitem>
+      <para>
+       Defines a set of user-visible parameters that control operator class
+       behavior.
+      </para>
+
+      <para>
+       The <function>options</function> function is passed a pointer to a
+       <structname>local_relopts</structname> struct, which needs to be
+       filled with a set of operator class specific options.  The options
+       can be accessed from other support functions using the
+       <literal>PG_HAS_OPCLASS_OPTIONS()</literal> and
+       <literal>PG_GET_OPCLASS_OPTIONS()</literal> macros.
+      </para>
+
+      <para>
+       Since both key extraction of indexed values and representation of the
+       key in <acronym>GIN</acronym> are flexible, they may depend on
+       user-specified parameters.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+ </para>
+
+ <para>
+  To support <quote>partial match</quote> queries, an operator class must
+  provide the <function>comparePartial</function> method, and its
+  <function>extractQuery</function> method must set the <literal>pmatch</literal>
+  parameter when a partial-match query is encountered.  See
+  <xref linkend="gin-partial-match"/> for details.
+ </para>
+
+ <para>
+  The actual data types of the various <literal>Datum</literal> values mentioned
+  above vary depending on the operator class.  The item values passed to
+  <function>extractValue</function> are always of the operator class's input type, and
+  all key values must be of the class's <literal>STORAGE</literal> type.  The type of
+  the <literal>query</literal> argument passed to <function>extractQuery</function>,
+  <function>consistent</function> and <function>triConsistent</function> is whatever is the
+  right-hand input type of the class member operator identified by the
+  strategy number.  This need not be the same as the indexed type, so long as
+  key values of the correct type can be extracted from it.  However, it is
+  recommended that the SQL declarations of these three support functions use
+  the opclass's indexed data type for the <literal>query</literal> argument, even
+  though the actual type might be something else depending on the operator.
+ </para>
+
+</sect1>
+
+<sect1 id="gin-implementation">
+ <title>Implementation</title>
+
+ <para>
+  Internally, a <acronym>GIN</acronym> index contains a B-tree index
+  constructed over keys, where each key is an element of one or more indexed
+  items (a member of an array, for example) and where each tuple in a leaf
+  page contains either a pointer to a B-tree of heap pointers (a
+  <quote>posting tree</quote>), or a simple list of heap pointers (a <quote>posting
+  list</quote>) when the list is small enough to fit into a single index tuple along
+  with the key value.  <xref linkend="gin-internals-figure"/> illustrates
+  these components of a GIN index.
+ </para>
+
+ <para>
+  As of <productname>PostgreSQL</productname> 9.1, null key values can be
+  included in the index.  Also, placeholder nulls are included in the index
+  for indexed items that are null or contain no keys according to
+  <function>extractValue</function>.  This allows searches that should find empty
+  items to do so.
+ </para>
+
+ <para>
+  Multicolumn <acronym>GIN</acronym> indexes are implemented by building
+  a single B-tree over composite values (column number, key value).  The
+  key values for different columns can be of different types.
+ </para>
+
+ <figure id="gin-internals-figure">
+  <title>GIN Internals</title>
+  <mediaobject>
+   <imageobject>
+    <imagedata fileref="images/gin.svg" format="SVG" width="100%"/>
+   </imageobject>
+  </mediaobject>
+ </figure>
+
+ <sect2 id="gin-fast-update">
+  <title>GIN Fast Update Technique</title>
+
+  <para>
+   Updating a <acronym>GIN</acronym> index tends to be slow because of the
+   intrinsic nature of inverted indexes: inserting or updating one heap row
+   can cause many inserts into the index (one for each key extracted
+   from the indexed item).
+   <acronym>GIN</acronym> is capable of postponing much of this work by inserting
+   new tuples into a temporary, unsorted list of pending entries.
+   When the table is vacuumed or autoanalyzed, or when
+   <function>gin_clean_pending_list</function> function is called, or if the
+   pending list becomes larger than
+   <xref linkend="guc-gin-pending-list-limit"/>, the entries are moved to the
+   main <acronym>GIN</acronym> data structure using the same bulk insert
+   techniques used during initial index creation.  This greatly improves
+   <acronym>GIN</acronym> index update speed, even counting the additional
+   vacuum overhead.  Moreover the overhead work can be done by a background
+   process instead of in foreground query processing.
+  </para>
+
+  <para>
+   The main disadvantage of this approach is that searches must scan the list
+   of pending entries in addition to searching the regular index, and so
+   a large list of pending entries will slow searches significantly.
+   Another disadvantage is that, while most updates are fast, an update
+   that causes the pending list to become <quote>too large</quote> will incur an
+   immediate cleanup cycle and thus be much slower than other updates.
+   Proper use of autovacuum can minimize both of these problems.
+  </para>
+
+  <para>
+   If consistent response time is more important than update speed,
+   use of pending entries can be disabled by turning off the
+   <literal>fastupdate</literal> storage parameter for a
+   <acronym>GIN</acronym> index.  See <xref linkend="sql-createindex"/>
+   for details.
+  </para>
+ </sect2>
+
+ <sect2 id="gin-partial-match">
+  <title>Partial Match Algorithm</title>
+
+  <para>
+   GIN can support <quote>partial match</quote> queries, in which the query
+   does not determine an exact match for one or more keys, but the possible
+   matches fall within a reasonably narrow range of key values (within the
+   key sorting order determined by the <function>compare</function> support method).
+   The <function>extractQuery</function> method, instead of returning a key value
+   to be matched exactly, returns a key value that is the lower bound of
+   the range to be searched, and sets the <literal>pmatch</literal> flag true.
+   The key range is then scanned using the <function>comparePartial</function>
+   method.  <function>comparePartial</function> must return zero for a matching
+   index key, less than zero for a non-match that is still within the range
+   to be searched, or greater than zero if the index key is past the range
+   that could match.
+  </para>
+ </sect2>
+
+</sect1>
+
+<sect1 id="gin-tips">
+<title>GIN Tips and Tricks</title>
+
+ <variablelist>
+  <varlistentry>
+   <term>Create vs. insert</term>
+   <listitem>
+    <para>
+     Insertion into a <acronym>GIN</acronym> index can be slow
+     due to the likelihood of many keys being inserted for each item.
+     So, for bulk insertions into a table it is advisable to drop the GIN
+     index and recreate it after finishing bulk insertion.
+    </para>
+
+    <para>
+     When <literal>fastupdate</literal> is enabled for <acronym>GIN</acronym>
+     (see <xref linkend="gin-fast-update"/> for details), the penalty is
+     less than when it is not.  But for very large updates it may still be
+     best to drop and recreate the index.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><xref linkend="guc-maintenance-work-mem"/></term>
+   <listitem>
+    <para>
+     Build time for a <acronym>GIN</acronym> index is very sensitive to
+     the <varname>maintenance_work_mem</varname> setting; it doesn't pay to
+     skimp on work memory during index creation.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><xref linkend="guc-gin-pending-list-limit"/></term>
+   <listitem>
+    <para>
+     During a series of insertions into an existing <acronym>GIN</acronym>
+     index that has <literal>fastupdate</literal> enabled, the system will clean up
+     the pending-entry list whenever the list grows larger than
+     <varname>gin_pending_list_limit</varname>. To avoid fluctuations in observed
+     response time, it's desirable to have pending-list cleanup occur in the
+     background (i.e., via autovacuum).  Foreground cleanup operations
+     can be avoided by increasing <varname>gin_pending_list_limit</varname>
+     or making autovacuum more aggressive.
+     However, enlarging the threshold of the cleanup operation means that
+     if a foreground cleanup does occur, it will take even longer.
+    </para>
+    <para>
+     <varname>gin_pending_list_limit</varname> can be overridden for individual
+     GIN indexes by changing storage parameters, which allows each
+     GIN index to have its own cleanup threshold.
+     For example, it's possible to increase the threshold only for the GIN
+     index which can be updated heavily, and decrease it otherwise.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><xref linkend="guc-gin-fuzzy-search-limit"/></term>
+   <listitem>
+    <para>
+     The primary goal of developing <acronym>GIN</acronym> indexes was
+     to create support for highly scalable full-text search in
+     <productname>PostgreSQL</productname>, and there are often situations when
+     a full-text search returns a very large set of results.  Moreover, this
+     often happens when the query contains very frequent words, so that the
+     large result set is not even useful.  Since reading many
+     tuples from the disk and sorting them could take a lot of time, this is
+     unacceptable for production.  (Note that the index search itself is very
+     fast.)
+    </para>
+    <para>
+     To facilitate controlled execution of such queries,
+     <acronym>GIN</acronym> has a configurable soft upper limit on the
+     number of rows returned: the
+     <varname>gin_fuzzy_search_limit</varname> configuration parameter.
+     It is set to 0 (meaning no limit) by default.
+     If a non-zero limit is set, then the returned set is a subset of
+     the whole result set, chosen at random.
+    </para>
+    <para>
+     <quote>Soft</quote> means that the actual number of returned results
+     could differ somewhat from the specified limit, depending on the query
+     and the quality of the system's random number generator.
+    </para>
+    <para>
+     From experience, values in the thousands (e.g., 5000 &mdash; 20000)
+     work well.
+    </para>
+   </listitem>
+  </varlistentry>
+ </variablelist>
+
+</sect1>
+
+<sect1 id="gin-limit">
+ <title>Limitations</title>
+
+ <para>
+  <acronym>GIN</acronym> assumes that indexable operators are strict.  This
+  means that <function>extractValue</function> will not be called at all on a null
+  item value (instead, a placeholder index entry is created automatically),
+  and <function>extractQuery</function> will not be called on a null query
+  value either (instead, the query is presumed to be unsatisfiable).  Note
+  however that null key values contained within a non-null composite item
+  or query value are supported.
+ </para>
+</sect1>
+
+<sect1 id="gin-examples">
+ <title>Examples</title>
+
+ <para>
+  The core <productname>PostgreSQL</productname> distribution
+  includes the <acronym>GIN</acronym> operator classes previously shown in
+  <xref linkend="gin-builtin-opclasses-table"/>.
+  The following <filename>contrib</filename> modules also contain
+  <acronym>GIN</acronym> operator classes:
+
+ <variablelist>
+  <varlistentry>
+   <term><filename>btree_gin</filename></term>
+   <listitem>
+    <para>B-tree equivalent functionality for several data types</para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><filename>hstore</filename></term>
+   <listitem>
+    <para>Module for storing (key, value) pairs</para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><filename>intarray</filename></term>
+   <listitem>
+    <para>Enhanced support for <type>int[]</type></para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><filename>pg_trgm</filename></term>
+   <listitem>
+    <para>Text similarity using trigram matching</para>
+   </listitem>
+  </varlistentry>
+ </variablelist>
+ </para>
+</sect1>
+
+</chapter>
-- 
cgit v1.2.3