summaryrefslogtreecommitdiffstats
path: root/doc/src/sgml/intarray.sgml
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src/sgml/intarray.sgml')
-rw-r--r--doc/src/sgml/intarray.sgml503
1 files changed, 503 insertions, 0 deletions
diff --git a/doc/src/sgml/intarray.sgml b/doc/src/sgml/intarray.sgml
new file mode 100644
index 0000000..18c6f8c
--- /dev/null
+++ b/doc/src/sgml/intarray.sgml
@@ -0,0 +1,503 @@
+<!-- doc/src/sgml/intarray.sgml -->
+
+<sect1 id="intarray" xreflabel="intarray">
+ <title>intarray</title>
+
+ <indexterm zone="intarray">
+ <primary>intarray</primary>
+ </indexterm>
+
+ <para>
+ The <filename>intarray</filename> module provides a number of useful functions
+ and operators for manipulating null-free arrays of integers.
+ There is also support for indexed searches using some of the operators.
+ </para>
+
+ <para>
+ All of these operations will throw an error if a supplied array contains any
+ NULL elements.
+ </para>
+
+ <para>
+ Many of these operations are only sensible for one-dimensional arrays.
+ Although they will accept input arrays of more dimensions, the data is
+ treated as though it were a linear array in storage order.
+ </para>
+
+ <para>
+ This module is considered <quote>trusted</quote>, that is, it can be
+ installed by non-superusers who have <literal>CREATE</literal> privilege
+ on the current database.
+ </para>
+
+ <sect2>
+ <title><filename>intarray</filename> Functions and Operators</title>
+
+ <para>
+ The functions provided by the <filename>intarray</filename> module
+ are shown in <xref linkend="intarray-func-table"/>, the operators
+ in <xref linkend="intarray-op-table"/>.
+ </para>
+
+ <table id="intarray-func-table">
+ <title><filename>intarray</filename> Functions</title>
+ <tgroup cols="1">
+ <thead>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ Function
+ </para>
+ <para>
+ Description
+ </para>
+ <para>
+ Example(s)
+ </para></entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>icount</primary></indexterm>
+ <function>icount</function> ( <type>integer[]</type> )
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ Returns the number of elements in the array.
+ </para>
+ <para>
+ <literal>icount('{1,2,3}'::integer[])</literal>
+ <returnvalue>3</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>sort</primary></indexterm>
+ <function>sort</function> ( <type>integer[]</type>, <parameter>dir</parameter> <type>text</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Sorts the array in either ascending or descending order.
+ <parameter>dir</parameter> must be <literal>asc</literal>
+ or <literal>desc</literal>.
+ </para>
+ <para>
+ <literal>sort('{1,3,2}'::integer[], 'desc')</literal>
+ <returnvalue>{3,2,1}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>sort</function> ( <type>integer[]</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para role="func_signature">
+ <indexterm><primary>sort_asc</primary></indexterm>
+ <function>sort_asc</function> ( <type>integer[]</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Sorts in ascending order.
+ </para>
+ <para>
+ <literal>sort(array[11,77,44])</literal>
+ <returnvalue>{11,44,77}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>sort_desc</primary></indexterm>
+ <function>sort_desc</function> ( <type>integer[]</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Sorts in descending order.
+ </para>
+ <para>
+ <literal>sort_desc(array[11,77,44])</literal>
+ <returnvalue>{77,44,11}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>uniq</primary></indexterm>
+ <function>uniq</function> ( <type>integer[]</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Removes adjacent duplicates.
+ Often used with <function>sort</function> to remove all duplicates.
+ </para>
+ <para>
+ <literal>uniq('{1,2,2,3,1,1}'::integer[])</literal>
+ <returnvalue>{1,2,3,1}</returnvalue>
+ </para>
+ <para>
+ <literal>uniq(sort('{1,2,3,2,1}'::integer[]))</literal>
+ <returnvalue>{1,2,3}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>idx</primary></indexterm>
+ <function>idx</function> ( <type>integer[]</type>, <parameter>item</parameter> <type>integer</type> )
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ Returns index of the first array element
+ matching <parameter>item</parameter>, or 0 if no match.
+ </para>
+ <para>
+ <literal>idx(array[11,22,33,22,11], 22)</literal>
+ <returnvalue>2</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>subarray</primary></indexterm>
+ <function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type>, <parameter>len</parameter> <type>integer</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Extracts the portion of the array starting at
+ position <parameter>start</parameter>, with <parameter>len</parameter>
+ elements.
+ </para>
+ <para>
+ <literal>subarray('{1,2,3,2,1}'::integer[], 2, 3)</literal>
+ <returnvalue>{2,3,2}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Extracts the portion of the array starting at
+ position <parameter>start</parameter>.
+ </para>
+ <para>
+ <literal>subarray('{1,2,3,2,1}'::integer[], 2)</literal>
+ <returnvalue>{2,3,2,1}</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm><primary>intset</primary></indexterm>
+ <function>intset</function> ( <type>integer</type> )
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Makes a single-element array.
+ </para>
+ <para>
+ <literal>intset(42)</literal>
+ <returnvalue>{42}</returnvalue>
+ </para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table id="intarray-op-table">
+ <title><filename>intarray</filename> Operators</title>
+ <tgroup cols="1">
+ <thead>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ Operator
+ </para>
+ <para>
+ Description
+ </para></entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>&amp;&amp;</literal> <type>integer[]</type>
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Do arrays overlap (have at least one element in common)?
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>@&gt;</literal> <type>integer[]</type>
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Does left array contain right array?
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>&lt;@</literal> <type>integer[]</type>
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Is left array contained in right array?
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type></type> <literal>#</literal> <type>integer[]</type>
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ Returns the number of elements in the array.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>#</literal> <type>integer</type>
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ Returns index of the first array element
+ matching the right argument, or 0 if no match.
+ (Same as <function>idx</function> function.)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>+</literal> <type>integer</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Adds element to end of array.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>+</literal> <type>integer[]</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Concatenates the arrays.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>-</literal> <type>integer</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Removes entries matching the right argument from the array.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>-</literal> <type>integer[]</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Removes elements of the right array from the left array.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>|</literal> <type>integer</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Computes the union of the arguments.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>|</literal> <type>integer[]</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Computes the union of the arguments.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>&amp;</literal> <type>integer[]</type>
+ <returnvalue>integer[]</returnvalue>
+ </para>
+ <para>
+ Computes the intersection of the arguments.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>integer[]</type> <literal>@@</literal> <type>query_int</type>
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Does array satisfy query? (see below)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <type>query_int</type> <literal>~~</literal> <type>integer[]</type>
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Does array satisfy query? (commutator of <literal>@@</literal>)
+ </para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ The operators <literal>&amp;&amp;</literal>, <literal>@&gt;</literal> and
+ <literal>&lt;@</literal> are equivalent to <productname>PostgreSQL</productname>'s built-in
+ operators of the same names, except that they work only on integer arrays
+ that do not contain nulls, while the built-in operators work for any array
+ type. This restriction makes them faster than the built-in operators
+ in many cases.
+ </para>
+
+ <para>
+ The <literal>@@</literal> and <literal>~~</literal> operators test whether an array
+ satisfies a <firstterm>query</firstterm>, which is expressed as a value of a
+ specialized data type <type>query_int</type>. A <firstterm>query</firstterm>
+ consists of integer values that are checked against the elements of
+ the array, possibly combined using the operators <literal>&amp;</literal>
+ (AND), <literal>|</literal> (OR), and <literal>!</literal> (NOT). Parentheses
+ can be used as needed. For example,
+ the query <literal>1&amp;(2|3)</literal> matches arrays that contain 1
+ and also contain either 2 or 3.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Index Support</title>
+
+ <para>
+ <filename>intarray</filename> provides index support for the
+ <literal>&amp;&amp;</literal>, <literal>@&gt;</literal>,
+ and <literal>@@</literal> operators, as well as regular array equality.
+ </para>
+
+ <para>
+ Two parameterized GiST index operator classes are provided:
+ <literal>gist__int_ops</literal> (used by default) is suitable for
+ small- to medium-size data sets, while
+ <literal>gist__intbig_ops</literal> uses a larger signature and is more
+ suitable for indexing large data sets (i.e., columns containing
+ a large number of distinct array values).
+ The implementation uses an RD-tree data structure with
+ built-in lossy compression.
+ </para>
+
+ <para>
+ <literal>gist__int_ops</literal> approximates an integer set as an array of
+ integer ranges. Its optional integer parameter <literal>numranges</literal>
+ determines the maximum number of ranges in
+ one index key. The default value of <literal>numranges</literal> is 100.
+ Valid values are between 1 and 253. Using larger arrays as GiST index
+ keys leads to a more precise search (scanning a smaller fraction of the index and
+ fewer heap pages), at the cost of a larger index.
+ </para>
+
+ <para>
+ <literal>gist__intbig_ops</literal> approximates an integer set as a bitmap
+ signature. Its optional integer parameter <literal>siglen</literal>
+ determines the signature length in bytes.
+ The default signature length is 16 bytes. Valid values of signature length
+ are between 1 and 2024 bytes. Longer signatures lead to a more precise
+ search (scanning a smaller fraction of the index and fewer heap pages), at
+ the cost of a larger index.
+ </para>
+
+ <para>
+ There is also a non-default GIN operator class
+ <literal>gin__int_ops</literal>, which supports these operators as well
+ as <literal>&lt;@</literal>.
+ </para>
+
+ <para>
+ The choice between GiST and GIN indexing depends on the relative
+ performance characteristics of GiST and GIN, which are discussed elsewhere.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Example</title>
+
+<programlisting>
+-- a message can be in one or more <quote>sections</quote>
+CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...);
+
+-- create specialized index with signature length of 32 bytes
+CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__intbig_ops (siglen = 32));
+
+-- select messages in section 1 OR 2 - OVERLAP operator
+SELECT message.mid FROM message WHERE message.sections &amp;&amp; '{1,2}';
+
+-- select messages in sections 1 AND 2 - CONTAINS operator
+SELECT message.mid FROM message WHERE message.sections @&gt; '{1,2}';
+
+-- the same, using QUERY operator
+SELECT message.mid FROM message WHERE message.sections @@ '1&amp;2'::query_int;
+</programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Benchmark</title>
+
+ <para>
+ The source directory <filename>contrib/intarray/bench</filename> contains a
+ benchmark test suite, which can be run against an installed
+ <productname>PostgreSQL</productname> server. (It also requires <filename>DBD::Pg</filename>
+ to be installed.) To run:
+ </para>
+
+<programlisting>
+cd .../contrib/intarray/bench
+createdb TEST
+psql -c "CREATE EXTENSION intarray" TEST
+./create_test.pl | psql TEST
+./bench.pl
+</programlisting>
+
+ <para>
+ The <filename>bench.pl</filename> script has numerous options, which
+ are displayed when it is run without any arguments.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Authors</title>
+
+ <para>
+ All work was done by Teodor Sigaev (<email>teodor@sigaev.ru</email>) and
+ Oleg Bartunov (<email>oleg@sai.msu.su</email>). See
+ <ulink url="http://www.sai.msu.su/~megera/postgres/gist/"></ulink> for
+ additional information. Andrey Oktyabrski did a great work on adding new
+ functions and operations.
+ </para>
+ </sect2>
+
+</sect1>