diff options
Diffstat (limited to 'doc/src/sgml/intarray.sgml')
-rw-r--r-- | doc/src/sgml/intarray.sgml | 505 |
1 files changed, 505 insertions, 0 deletions
diff --git a/doc/src/sgml/intarray.sgml b/doc/src/sgml/intarray.sgml new file mode 100644 index 0000000..fdeedc6 --- /dev/null +++ b/doc/src/sgml/intarray.sgml @@ -0,0 +1,505 @@ +<!-- doc/src/sgml/intarray.sgml --> + +<sect1 id="intarray" xreflabel="intarray"> + <title>intarray</title> + + <indexterm zone="intarray"> + <primary>intarray</primary> + </indexterm> + + <para> + The <filename>intarray</filename> module provides a number of useful functions + and operators for manipulating null-free arrays of integers. + There is also support for indexed searches using some of the operators. + </para> + + <para> + All of these operations will throw an error if a supplied array contains any + NULL elements. + </para> + + <para> + Many of these operations are only sensible for one-dimensional arrays. + Although they will accept input arrays of more dimensions, the data is + treated as though it were a linear array in storage order. + </para> + + <para> + This module is considered <quote>trusted</quote>, that is, it can be + installed by non-superusers who have <literal>CREATE</literal> privilege + on the current database. + </para> + + <sect2> + <title><filename>intarray</filename> Functions and Operators</title> + + <para> + The functions provided by the <filename>intarray</filename> module + are shown in <xref linkend="intarray-func-table"/>, the operators + in <xref linkend="intarray-op-table"/>. + </para> + + <table id="intarray-func-table"> + <title><filename>intarray</filename> Functions</title> + <tgroup cols="1"> + <thead> + <row> + <entry role="func_table_entry"><para role="func_signature"> + Function + </para> + <para> + Description + </para> + <para> + Example(s) + </para></entry> + </row> + </thead> + + <tbody> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>icount</primary></indexterm> + <function>icount</function> ( <type>integer[]</type> ) + <returnvalue>integer</returnvalue> + </para> + <para> + Returns the number of elements in the array. + </para> + <para> + <literal>icount('{1,2,3}'::integer[])</literal> + <returnvalue>3</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>sort</primary></indexterm> + <function>sort</function> ( <type>integer[]</type>, <parameter>dir</parameter> <type>text</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Sorts the array in either ascending or descending order. + <parameter>dir</parameter> must be <literal>asc</literal> + or <literal>desc</literal>. + </para> + <para> + <literal>sort('{1,3,2}'::integer[], 'desc')</literal> + <returnvalue>{3,2,1}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>sort</function> ( <type>integer[]</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para role="func_signature"> + <indexterm><primary>sort_asc</primary></indexterm> + <function>sort_asc</function> ( <type>integer[]</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Sorts in ascending order. + </para> + <para> + <literal>sort(array[11,77,44])</literal> + <returnvalue>{11,44,77}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>sort_desc</primary></indexterm> + <function>sort_desc</function> ( <type>integer[]</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Sorts in descending order. + </para> + <para> + <literal>sort_desc(array[11,77,44])</literal> + <returnvalue>{77,44,11}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>uniq</primary></indexterm> + <function>uniq</function> ( <type>integer[]</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Removes adjacent duplicates. + </para> + <para> + <literal>uniq(sort('{1,2,3,2,1}'::integer[]))</literal> + <returnvalue>{1,2,3}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>idx</primary></indexterm> + <function>idx</function> ( <type>integer[]</type>, <parameter>item</parameter> <type>integer</type> ) + <returnvalue>integer</returnvalue> + </para> + <para> + Returns index of the first array element + matching <parameter>item</parameter>, or 0 if no match. + </para> + <para> + <literal>idx(array[11,22,33,22,11], 22)</literal> + <returnvalue>2</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>subarray</primary></indexterm> + <function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type>, <parameter>len</parameter> <type>integer</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Extracts the portion of the array starting at + position <parameter>start</parameter>, with <parameter>len</parameter> + elements. + </para> + <para> + <literal>subarray('{1,2,3,2,1}'::integer[], 2, 3)</literal> + <returnvalue>{2,3,2}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Extracts the portion of the array starting at + position <parameter>start</parameter>. + </para> + <para> + <literal>subarray('{1,2,3,2,1}'::integer[], 2)</literal> + <returnvalue>{2,3,2,1}</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm><primary>intset</primary></indexterm> + <function>intset</function> ( <type>integer</type> ) + <returnvalue>integer[]</returnvalue> + </para> + <para> + Makes a single-element array. + </para> + <para> + <literal>intset(42)</literal> + <returnvalue>{42}</returnvalue> + </para></entry> + </row> + </tbody> + </tgroup> + </table> + + <table id="intarray-op-table"> + <title><filename>intarray</filename> Operators</title> + <tgroup cols="1"> + <thead> + <row> + <entry role="func_table_entry"><para role="func_signature"> + Operator + </para> + <para> + Description + </para></entry> + </row> + </thead> + + <tbody> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>&&</literal> <type>integer[]</type> + <returnvalue>boolean</returnvalue> + </para> + <para> + Do arrays overlap (have at least one element in common)? + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>@></literal> <type>integer[]</type> + <returnvalue>boolean</returnvalue> + </para> + <para> + Does left array contain right array? + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal><@</literal> <type>integer[]</type> + <returnvalue>boolean</returnvalue> + </para> + <para> + Is left array contained in right array? + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type></type> <literal>#</literal> <type>integer[]</type> + <returnvalue>integer</returnvalue> + </para> + <para> + Returns the number of elements in the array. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>#</literal> <type>integer</type> + <returnvalue>integer</returnvalue> + </para> + <para> + Returns index of the first array element + matching the right argument, or 0 if no match. + (Same as <function>idx</function> function.) + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>+</literal> <type>integer</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Adds element to end of array. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>+</literal> <type>integer[]</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Concatenates the arrays. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>-</literal> <type>integer</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Removes entries matching the right argument from the array. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>-</literal> <type>integer[]</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Removes elements of the right array from the left array. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>|</literal> <type>integer</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Computes the union of the arguments. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>|</literal> <type>integer[]</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Computes the union of the arguments. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>&</literal> <type>integer[]</type> + <returnvalue>integer[]</returnvalue> + </para> + <para> + Computes the intersection of the arguments. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>integer[]</type> <literal>@@</literal> <type>query_int</type> + <returnvalue>boolean</returnvalue> + </para> + <para> + Does array satisfy query? (see below) + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <type>query_int</type> <literal>~~</literal> <type>integer[]</type> + <returnvalue>boolean</returnvalue> + </para> + <para> + Does array satisfy query? (commutator of <literal>@@</literal>) + </para></entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + (Before PostgreSQL 8.2, the containment operators <literal>@></literal> and + <literal><@</literal> were respectively called <literal>@</literal> and <literal>~</literal>. + These names are still available, but are deprecated and will eventually be + retired. Notice that the old names are reversed from the convention + formerly followed by the core geometric data types!) + </para> + + <para> + The operators <literal>&&</literal>, <literal>@></literal> and + <literal><@</literal> are equivalent to <productname>PostgreSQL</productname>'s built-in + operators of the same names, except that they work only on integer arrays + that do not contain nulls, while the built-in operators work for any array + type. This restriction makes them faster than the built-in operators + in many cases. + </para> + + <para> + The <literal>@@</literal> and <literal>~~</literal> operators test whether an array + satisfies a <firstterm>query</firstterm>, which is expressed as a value of a + specialized data type <type>query_int</type>. A <firstterm>query</firstterm> + consists of integer values that are checked against the elements of + the array, possibly combined using the operators <literal>&</literal> + (AND), <literal>|</literal> (OR), and <literal>!</literal> (NOT). Parentheses + can be used as needed. For example, + the query <literal>1&(2|3)</literal> matches arrays that contain 1 + and also contain either 2 or 3. + </para> + </sect2> + + <sect2> + <title>Index Support</title> + + <para> + <filename>intarray</filename> provides index support for the + <literal>&&</literal>, <literal>@></literal>, <literal><@</literal>, + and <literal>@@</literal> operators, as well as regular array equality. + </para> + + <para> + Two parameterized GiST index operator classes are provided: + <literal>gist__int_ops</literal> (used by default) is suitable for + small- to medium-size data sets, while + <literal>gist__intbig_ops</literal> uses a larger signature and is more + suitable for indexing large data sets (i.e., columns containing + a large number of distinct array values). + The implementation uses an RD-tree data structure with + built-in lossy compression. + </para> + + <para> + <literal>gist__int_ops</literal> approximates an integer set as an array of + integer ranges. Its optional integer parameter <literal>numranges</literal> + determines the maximum number of ranges in + one index key. The default value of <literal>numranges</literal> is 100. + Valid values are between 1 and 253. Using larger arrays as GiST index + keys leads to a more precise search (scanning a smaller fraction of the index and + fewer heap pages), at the cost of a larger index. + </para> + + <para> + <literal>gist__intbig_ops</literal> approximates an integer set as a bitmap + signature. Its optional integer parameter <literal>siglen</literal> + determines the signature length in bytes. + The default signature length is 16 bytes. Valid values of signature length + are between 1 and 2024 bytes. Longer signatures lead to a more precise + search (scanning a smaller fraction of the index and fewer heap pages), at + the cost of a larger index. + </para> + + <para> + There is also a non-default GIN operator class + <literal>gin__int_ops</literal> supporting the same operators. + </para> + + <para> + The choice between GiST and GIN indexing depends on the relative + performance characteristics of GiST and GIN, which are discussed elsewhere. + </para> + </sect2> + + <sect2> + <title>Example</title> + +<programlisting> +-- a message can be in one or more <quote>sections</quote> +CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...); + +-- create specialized index with signature length of 32 bytes +CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__intbig_ops (siglen = 32)); + +-- select messages in section 1 OR 2 - OVERLAP operator +SELECT message.mid FROM message WHERE message.sections && '{1,2}'; + +-- select messages in sections 1 AND 2 - CONTAINS operator +SELECT message.mid FROM message WHERE message.sections @> '{1,2}'; + +-- the same, using QUERY operator +SELECT message.mid FROM message WHERE message.sections @@ '1&2'::query_int; +</programlisting> + </sect2> + + <sect2> + <title>Benchmark</title> + + <para> + The source directory <filename>contrib/intarray/bench</filename> contains a + benchmark test suite, which can be run against an installed + <productname>PostgreSQL</productname> server. (It also requires <filename>DBD::Pg</filename> + to be installed.) To run: + </para> + +<programlisting> +cd .../contrib/intarray/bench +createdb TEST +psql -c "CREATE EXTENSION intarray" TEST +./create_test.pl | psql TEST +./bench.pl +</programlisting> + + <para> + The <filename>bench.pl</filename> script has numerous options, which + are displayed when it is run without any arguments. + </para> + </sect2> + + <sect2> + <title>Authors</title> + + <para> + All work was done by Teodor Sigaev (<email>teodor@sigaev.ru</email>) and + Oleg Bartunov (<email>oleg@sai.msu.su</email>). See + <ulink url="http://www.sai.msu.su/~megera/postgres/gist/"></ulink> for + additional information. Andrey Oktyabrski did a great work on adding new + functions and operations. + </para> + </sect2> + +</sect1> |