1 files changed, 592 insertions, 0 deletions
diff --git a/doc/src/sgml/rangetypes.sgml b/doc/src/sgml/rangetypes.sgml
new file mode 100644
index 0000000..92ea0e8
--- /dev/null
+++ b/doc/src/sgml/rangetypes.sgml
@@ -0,0 +1,592 @@
+<!-- doc/src/sgml/rangetypes.sgml -->
+
+<sect1 id="rangetypes">
+ <title>Range Types</title>
+
+ <indexterm>
+  <primary>range type</primary>
+ </indexterm>
+
+ <indexterm>
+  <primary>multirange type</primary>
+ </indexterm>
+
+ <para>
+  Range types are data types representing a range of values of some
+  element type (called the range's <firstterm>subtype</firstterm>).
+  For instance, ranges
+  of <type>timestamp</type> might be used to represent the ranges of
+  time that a meeting room is reserved. In this case the data type
+  is <type>tsrange</type> (short for <quote>timestamp range</quote>),
+  and <type>timestamp</type> is the subtype.  The subtype must have
+  a total order so that it is well-defined whether element values are
+  within, before, or after a range of values.
+ </para>
+
+ <para>
+  Range types are useful because they represent many element values in a
+  single range value, and because concepts such as overlapping ranges can
+  be expressed clearly. The use of time and date ranges for scheduling
+  purposes is the clearest example; but price ranges, measurement
+  ranges from an instrument, and so forth can also be useful.
+ </para>
+
+ <para>
+  Every range type has a corresponding multirange type. A multirange is
+  an ordered list of non-contiguous, non-empty, non-null ranges. Most
+  range operators also work on multiranges, and they have a few functions
+  of their own.
+ </para>
+
+ <sect2 id="rangetypes-builtin">
+  <title>Built-in Range and Multirange Types</title>
+
+ <para>
+  PostgreSQL comes with the following built-in range types:
+  <itemizedlist>
+    <listitem>
+      <para>
+       <type>int4range</type> &mdash; Range of <type>integer</type>,
+       <type>int4multirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+    <listitem>
+      <para>
+       <type>int8range</type> &mdash; Range of <type>bigint</type>,
+       <type>int8multirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+    <listitem>
+      <para>
+       <type>numrange</type> &mdash; Range of <type>numeric</type>,
+       <type>nummultirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+    <listitem>
+      <para>
+       <type>tsrange</type> &mdash; Range of <type>timestamp without time zone</type>,
+       <type>tsmultirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+    <listitem>
+      <para>
+       <type>tstzrange</type> &mdash; Range of <type>timestamp with time zone</type>,
+       <type>tstzmultirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+    <listitem>
+      <para>
+       <type>daterange</type> &mdash; Range of <type>date</type>,
+       <type>datemultirange</type> &mdash; corresponding Multirange
+      </para>
+    </listitem>
+  </itemizedlist>
+  In addition, you can define your own range types;
+  see <xref linkend="sql-createtype"/> for more information.
+ </para>
+ </sect2>
+
+ <sect2 id="rangetypes-examples">
+  <title>Examples</title>
+
+  <para>
+<programlisting>
+CREATE TABLE reservation (room int, during tsrange);
+INSERT INTO reservation VALUES
+    (1108, '[2010-01-01 14:30, 2010-01-01 15:30)');
+
+-- Containment
+SELECT int4range(10, 20) @&gt; 3;
+
+-- Overlaps
+SELECT numrange(11.1, 22.2) &amp;&amp; numrange(20.0, 30.0);
+
+-- Extract the upper bound
+SELECT upper(int8range(15, 25));
+
+-- Compute the intersection
+SELECT int4range(10, 20) * int4range(15, 25);
+
+-- Is the range empty?
+SELECT isempty(numrange(1, 5));
+</programlisting>
+
+   See <xref linkend="range-operators-table"/>
+   and <xref linkend="range-functions-table"/> for complete lists of
+   operators and functions on range types.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-inclusivity">
+  <title>Inclusive and Exclusive Bounds</title>
+
+  <para>
+   Every non-empty range has two bounds, the lower bound and the upper
+   bound. All points between these values are included in the range. An
+   inclusive bound means that the boundary point itself is included in
+   the range as well, while an exclusive bound means that the boundary
+   point is not included in the range.
+  </para>
+
+  <para>
+   In the text form of a range, an inclusive lower bound is represented by
+   <quote><literal>[</literal></quote> while an exclusive lower bound is
+   represented by <quote><literal>(</literal></quote>. Likewise, an inclusive upper bound is represented by
+   <quote><literal>]</literal></quote>, while an exclusive upper bound is
+   represented by <quote><literal>)</literal></quote>.
+   (See <xref linkend="rangetypes-io"/> for more details.)
+  </para>
+
+  <para>
+   The functions <literal>lower_inc</literal>
+   and <literal>upper_inc</literal> test the inclusivity of the lower
+   and upper bounds of a range value, respectively.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-infinite">
+  <title>Infinite (Unbounded) Ranges</title>
+
+  <para>
+   The lower bound of a range can be omitted, meaning that all
+   values less than the upper bound are included in the range, e.g.,
+   <literal>(,3]</literal>. Likewise, if the upper bound of the range
+   is omitted, then all values greater than the lower bound are included
+   in the range. If both lower and upper bounds are omitted, all values
+   of the element type are considered to be in the range.  Specifying a
+   missing bound as inclusive is automatically converted to exclusive,
+   e.g., <literal>[,]</literal> is converted to <literal>(,)</literal>.
+   You can think of these missing values as +/-infinity, but they are
+   special range type values and are considered to be beyond any range
+   element type's +/-infinity values.
+  </para>
+
+  <para>
+   Element types that have the notion of <quote>infinity</quote> can
+   use them as explicit bound values.  For example, with timestamp
+   ranges, <literal>[today,infinity)</literal> excludes the special
+   <type>timestamp</type> value <literal>infinity</literal>,
+   while <literal>[today,infinity]</literal> include it, as does
+   <literal>[today,)</literal> and <literal>[today,]</literal>.
+  </para>
+
+  <para>
+   The functions <literal>lower_inf</literal>
+   and <literal>upper_inf</literal> test for infinite lower
+   and upper bounds of a range, respectively.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-io">
+  <title>Range Input/Output</title>
+
+  <para>
+   The input for a range value must follow one of the following patterns:
+<synopsis>
+(<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>)
+(<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>]
+[<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>)
+[<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>]
+empty
+</synopsis>
+   The parentheses or brackets indicate whether the lower and upper bounds
+   are exclusive or inclusive, as described previously.
+   Notice that the final pattern is <literal>empty</literal>, which
+   represents an empty range (a range that contains no points).
+  </para>
+
+  <para>
+   The <replaceable>lower-bound</replaceable> may be either a string
+   that is valid input for the subtype, or empty to indicate no
+   lower bound.  Likewise, <replaceable>upper-bound</replaceable> may be
+   either a string that is valid input for the subtype, or empty to
+   indicate no upper bound.
+  </para>
+
+  <para>
+   Each bound value can be quoted using <literal>"</literal> (double quote)
+   characters.  This is necessary if the bound value contains parentheses,
+   brackets, commas, double quotes, or backslashes, since these characters
+   would otherwise be taken as part of the range syntax.  To put a double
+   quote or backslash in a quoted bound value, precede it with a
+   backslash. (Also, a pair of double quotes within a double-quoted bound
+   value is taken to represent a double quote character, analogously to the
+   rules for single quotes in SQL literal strings.) Alternatively, you can
+   avoid quoting and use backslash-escaping to protect all data characters
+   that would otherwise be taken as range syntax.  Also, to write a bound
+   value that is an empty string, write <literal>""</literal>, since writing
+   nothing means an infinite bound.
+  </para>
+
+  <para>
+   Whitespace is allowed before and after the range value, but any whitespace
+   between the parentheses or brackets is taken as part of the lower or upper
+   bound value.  (Depending on the element type, it might or might not be
+   significant.)
+  </para>
+
+  <note>
+   <para>
+    These rules are very similar to those for writing field values in
+    composite-type literals.  See <xref linkend="rowtypes-io-syntax"/> for
+    additional commentary.
+   </para>
+  </note>
+
+  <para>
+  Examples:
+<programlisting>
+-- includes 3, does not include 7, and does include all points in between
+SELECT '[3,7)'::int4range;
+
+-- does not include either 3 or 7, but includes all points in between
+SELECT '(3,7)'::int4range;
+
+-- includes only the single point 4
+SELECT '[4,4]'::int4range;
+
+-- includes no points (and will be normalized to 'empty')
+SELECT '[4,4)'::int4range;
+</programlisting>
+  </para>
+
+  <para>
+   The input for a multirange is curly brackets (<literal>{</literal> and
+   <literal>}</literal>) containing zero or more valid ranges,
+   separated by commas. Whitespace is permitted around the brackets and
+   commas. This is intended to be reminiscent of array syntax, although
+   multiranges are much simpler: they have just one dimension and there is
+   no need to quote their contents. (The bounds of their ranges may be
+   quoted as above however.)
+  </para>
+
+  <para>
+  Examples:
+<programlisting>
+SELECT '{}'::int4multirange;
+SELECT '{[3,7)}'::int4multirange;
+SELECT '{[3,7), [8,9)}'::int4multirange;
+</programlisting>
+  </para>
+
+ </sect2>
+
+ <sect2 id="rangetypes-construct">
+  <title>Constructing Ranges and Multiranges</title>
+
+  <para>
+   Each range type has a constructor function with the same name as the range
+   type.  Using the constructor function is frequently more convenient than
+   writing a range literal constant, since it avoids the need for extra
+   quoting of the bound values.  The constructor function
+   accepts two or three arguments.  The two-argument form constructs a range
+   in standard form (lower bound inclusive, upper bound exclusive), while
+   the three-argument form constructs a range with bounds of the form
+   specified by the third argument.
+   The third argument must be one of the strings
+   <quote><literal>()</literal></quote>,
+   <quote><literal>(]</literal></quote>,
+   <quote><literal>[)</literal></quote>, or
+   <quote><literal>[]</literal></quote>.
+   For example:
+
+<programlisting>
+-- The full form is: lower bound, upper bound, and text argument indicating
+-- inclusivity/exclusivity of bounds.
+SELECT numrange(1.0, 14.0, '(]');
+
+-- If the third argument is omitted, '[)' is assumed.
+SELECT numrange(1.0, 14.0);
+
+-- Although '(]' is specified here, on display the value will be converted to
+-- canonical form, since int8range is a discrete range type (see below).
+SELECT int8range(1, 14, '(]');
+
+-- Using NULL for either bound causes the range to be unbounded on that side.
+SELECT numrange(NULL, 2.2);
+</programlisting>
+  </para>
+
+  <para>
+   Each range type also has a multirange constructor with the same name as the
+   multirange type.  The constructor function takes zero or more arguments
+   which are all ranges of the appropriate type.
+   For example:
+
+<programlisting>
+SELECT nummultirange();
+SELECT nummultirange(numrange(1.0, 14.0));
+SELECT nummultirange(numrange(1.0, 14.0), numrange(20.0, 25.0));
+</programlisting>
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-discrete">
+  <title>Discrete Range Types</title>
+
+  <para>
+   A discrete range is one whose element type has a well-defined
+   <quote>step</quote>, such as <type>integer</type> or <type>date</type>.
+   In these types two elements can be said to be adjacent, when there are
+   no valid values between them.  This contrasts with continuous ranges,
+   where it's always (or almost always) possible to identify other element
+   values between two given values.  For example, a range over the
+   <type>numeric</type> type is continuous, as is a range over <type>timestamp</type>.
+   (Even though <type>timestamp</type> has limited precision, and so could
+   theoretically be treated as discrete, it's better to consider it continuous
+   since the step size is normally not of interest.)
+  </para>
+
+  <para>
+   Another way to think about a discrete range type is that there is a clear
+   idea of a <quote>next</quote> or <quote>previous</quote> value for each element value.
+   Knowing that, it is possible to convert between inclusive and exclusive
+   representations of a range's bounds, by choosing the next or previous
+   element value instead of the one originally given.
+   For example, in an integer range type <literal>[4,8]</literal> and
+   <literal>(3,9)</literal> denote the same set of values; but this would not be so
+   for a range over numeric.
+  </para>
+
+  <para>
+   A discrete range type should have a <firstterm>canonicalization</firstterm>
+   function that is aware of the desired step size for the element type.
+   The canonicalization function is charged with converting equivalent values
+   of the range type to have identical representations, in particular
+   consistently inclusive or exclusive bounds.
+   If a canonicalization function is not specified, then ranges with different
+   formatting will always be treated as unequal, even though they might
+   represent the same set of values in reality.
+  </para>
+
+  <para>
+   The built-in range types <type>int4range</type>, <type>int8range</type>,
+   and <type>daterange</type> all use a canonical form that includes
+   the lower bound and excludes the upper bound; that is,
+   <literal>[)</literal>. User-defined range types can use other conventions,
+   however.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-defining">
+  <title>Defining New Range Types</title>
+
+  <para>
+   Users can define their own range types. The most common reason to do
+   this is to use ranges over subtypes not provided among the built-in
+   range types.
+   For example, to define a new range type of subtype <type>float8</type>:
+
+<programlisting>
+CREATE TYPE floatrange AS RANGE (
+    subtype = float8,
+    subtype_diff = float8mi
+);
+
+SELECT '[1.234, 5.678]'::floatrange;
+</programlisting>
+
+   Because <type>float8</type> has no meaningful
+   <quote>step</quote>, we do not define a canonicalization
+   function in this example.
+  </para>
+
+  <para>
+   When you define your own range you automatically get a corresponding
+   multirange type.
+  </para>
+
+  <para>
+   Defining your own range type also allows you to specify a different
+   subtype B-tree operator class or collation to use, so as to change the sort
+   ordering that determines which values fall into a given range.
+  </para>
+
+  <para>
+   If the subtype is considered to have discrete rather than continuous
+   values, the <command>CREATE TYPE</command> command should specify a
+   <literal>canonical</literal> function.
+   The canonicalization function takes an input range value, and must return
+   an equivalent range value that may have different bounds and formatting.
+   The canonical output for two ranges that represent the same set of values,
+   for example the integer ranges <literal>[1, 7]</literal> and <literal>[1,
+   8)</literal>, must be identical.  It doesn't matter which representation
+   you choose to be the canonical one, so long as two equivalent values with
+   different formattings are always mapped to the same value with the same
+   formatting.  In addition to adjusting the inclusive/exclusive bounds
+   format, a canonicalization function might round off boundary values, in
+   case the desired step size is larger than what the subtype is capable of
+   storing.  For instance, a range type over <type>timestamp</type> could be
+   defined to have a step size of an hour, in which case the canonicalization
+   function would need to round off bounds that weren't a multiple of an hour,
+   or perhaps throw an error instead.
+  </para>
+
+  <para>
+   In addition, any range type that is meant to be used with GiST or SP-GiST
+   indexes should define a subtype difference, or <literal>subtype_diff</literal>,
+   function.  (The index will still work without <literal>subtype_diff</literal>,
+   but it is likely to be considerably less efficient than if a difference
+   function is provided.)  The subtype difference function takes two input
+   values of the subtype, and returns their difference
+   (i.e., <replaceable>X</replaceable> minus <replaceable>Y</replaceable>) represented as
+   a <type>float8</type> value.  In our example above, the
+   function <function>float8mi</function> that underlies the regular <type>float8</type>
+   minus operator can be used; but for any other subtype, some type
+   conversion would be necessary.  Some creative thought about how to
+   represent differences as numbers might be needed, too.  To the greatest
+   extent possible, the <literal>subtype_diff</literal> function should agree with
+   the sort ordering implied by the selected operator class and collation;
+   that is, its result should be positive whenever its first argument is
+   greater than its second according to the sort ordering.
+  </para>
+
+  <para>
+   A less-oversimplified example of a <literal>subtype_diff</literal> function is:
+  </para>
+
+<programlisting>
+CREATE FUNCTION time_subtype_diff(x time, y time) RETURNS float8 AS
+'SELECT EXTRACT(EPOCH FROM (x - y))' LANGUAGE sql STRICT IMMUTABLE;
+
+CREATE TYPE timerange AS RANGE (
+    subtype = time,
+    subtype_diff = time_subtype_diff
+);
+
+SELECT '[11:10, 23:00]'::timerange;
+</programlisting>
+
+  <para>
+   See <xref linkend="sql-createtype"/> for more information about creating
+   range types.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-indexing">
+  <title>Indexing</title>
+
+  <indexterm>
+    <primary>range type</primary>
+    <secondary>indexes on</secondary>
+  </indexterm>
+
+  <para>
+   GiST and SP-GiST indexes can be created for table columns of range types.
+   GiST indexes can be also created for table columns of multirange types.
+   For instance, to create a GiST index:
+<programlisting>
+CREATE INDEX reservation_idx ON reservation USING GIST (during);
+</programlisting>
+   A GiST or SP-GiST index on ranges can accelerate queries involving these
+   range operators:
+   <literal>=</literal>,
+   <literal>&amp;&amp;</literal>,
+   <literal>&lt;@</literal>,
+   <literal>@&gt;</literal>,
+   <literal>&lt;&lt;</literal>,
+   <literal>&gt;&gt;</literal>,
+   <literal>-|-</literal>,
+   <literal>&amp;&lt;</literal>, and
+   <literal>&amp;&gt;</literal>.
+   A GiST index on multiranges can accelerate queries involving the same
+   set of multirange operators.
+   A GiST index on ranges and GiST index on multiranges can also accelerate
+   queries involving these cross-type range to multirange and multirange to
+   range operators correspondingly:
+   <literal>&amp;&amp;</literal>,
+   <literal>&lt;@</literal>,
+   <literal>@&gt;</literal>,
+   <literal>&lt;&lt;</literal>,
+   <literal>&gt;&gt;</literal>,
+   <literal>-|-</literal>,
+   <literal>&amp;&lt;</literal>, and
+   <literal>&amp;&gt;</literal>.
+   See <xref linkend="range-operators-table"/> for more information.
+  </para>
+
+  <para>
+   In addition, B-tree and hash indexes can be created for table columns of
+   range types.  For these index types, basically the only useful range
+   operation is equality.  There is a B-tree sort ordering defined for range
+   values, with corresponding <literal>&lt;</literal> and <literal>&gt;</literal> operators,
+   but the ordering is rather arbitrary and not usually useful in the real
+   world.  Range types' B-tree and hash support is primarily meant to
+   allow sorting and hashing internally in queries, rather than creation of
+   actual indexes.
+  </para>
+ </sect2>
+
+ <sect2 id="rangetypes-constraint">
+  <title>Constraints on Ranges</title>
+
+  <indexterm>
+    <primary>range type</primary>
+    <secondary>exclude</secondary>
+  </indexterm>
+
+  <para>
+   While <literal>UNIQUE</literal> is a natural constraint for scalar
+   values, it is usually unsuitable for range types. Instead, an
+   exclusion constraint is often more appropriate
+   (see <link linkend="sql-createtable-exclude">CREATE TABLE
+   ... CONSTRAINT ... EXCLUDE</link>). Exclusion constraints allow the
+   specification of constraints such as <quote>non-overlapping</quote> on a
+   range type. For example:
+
+<programlisting>
+CREATE TABLE reservation (
+    during tsrange,
+    EXCLUDE USING GIST (during WITH &amp;&amp;)
+);
+</programlisting>
+
+   That constraint will prevent any overlapping values from existing
+   in the table at the same time:
+
+<programlisting>
+INSERT INTO reservation VALUES
+    ('[2010-01-01 11:30, 2010-01-01 15:00)');
+INSERT 0 1
+
+INSERT INTO reservation VALUES
+    ('[2010-01-01 14:45, 2010-01-01 15:45)');
+ERROR:  conflicting key value violates exclusion constraint "reservation_during_excl"
+DETAIL:  Key (during)=(["2010-01-01 14:45:00","2010-01-01 15:45:00")) conflicts
+with existing key (during)=(["2010-01-01 11:30:00","2010-01-01 15:00:00")).
+</programlisting>
+  </para>
+
+  <para>
+   You can use the <link linkend="btree-gist"><literal>btree_gist</literal></link>
+   extension to define exclusion constraints on plain scalar data types, which
+   can then be combined with range exclusions for maximum flexibility.  For
+   example, after <literal>btree_gist</literal> is installed, the following
+   constraint will reject overlapping ranges only if the meeting room numbers
+   are equal:
+
+<programlisting>
+CREATE EXTENSION btree_gist;
+CREATE TABLE room_reservation (
+    room text,
+    during tsrange,
+    EXCLUDE USING GIST (room WITH =, during WITH &amp;&amp;)
+);
+
+INSERT INTO room_reservation VALUES
+    ('123A', '[2010-01-01 14:00, 2010-01-01 15:00)');
+INSERT 0 1
+
+INSERT INTO room_reservation VALUES
+    ('123A', '[2010-01-01 14:30, 2010-01-01 15:30)');
+ERROR:  conflicting key value violates exclusion constraint "room_reservation_room_during_excl"
+DETAIL:  Key (room, during)=(123A, ["2010-01-01 14:30:00","2010-01-01 15:30:00")) conflicts
+with existing key (room, during)=(123A, ["2010-01-01 14:00:00","2010-01-01 15:00:00")).
+
+INSERT INTO room_reservation VALUES
+    ('123B', '[2010-01-01 14:30, 2010-01-01 15:30)');
+INSERT 0 1
+</programlisting>
+  </para>
+ </sect2>
+</sect1>