Adding upstream version 16.2.upstream/16.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 13:44:03 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 13:44:03 +0000
commit: 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
tree: fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /doc/src/sgml/ref/create_aggregate.sgml
parent: Initial commit. (diff)
download: postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz
postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip
1 files changed, 805 insertions, 0 deletions
diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml
new file mode 100644
index 0000000..222e0aa
--- /dev/null
+++ b/doc/src/sgml/ref/create_aggregate.sgml
@@ -0,0 +1,805 @@
+<!--
+doc/src/sgml/ref/create_aggregate.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="sql-createaggregate">
+ <indexterm zone="sql-createaggregate">
+  <primary>CREATE AGGREGATE</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE AGGREGATE</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE AGGREGATE</refname>
+  <refpurpose>define a new aggregate function</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> ( [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ) (
+    SFUNC = <replaceable class="parameter">sfunc</replaceable>,
+    STYPE = <replaceable class="parameter">state_data_type</replaceable>
+    [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
+    [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
+    [ , FINALFUNC_EXTRA ]
+    [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
+    [ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ]
+    [ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ]
+    [ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ]
+    [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ]
+    [ , MSFUNC = <replaceable class="parameter">msfunc</replaceable> ]
+    [ , MINVFUNC = <replaceable class="parameter">minvfunc</replaceable> ]
+    [ , MSTYPE = <replaceable class="parameter">mstate_data_type</replaceable> ]
+    [ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ]
+    [ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ]
+    [ , MFINALFUNC_EXTRA ]
+    [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
+    [ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ]
+    [ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ]
+    [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
+)
+
+CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> ( [ [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ]
+                        ORDER BY [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ) (
+    SFUNC = <replaceable class="parameter">sfunc</replaceable>,
+    STYPE = <replaceable class="parameter">state_data_type</replaceable>
+    [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
+    [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
+    [ , FINALFUNC_EXTRA ]
+    [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
+    [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ]
+    [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
+    [ , HYPOTHETICAL ]
+)
+
+<phrase>or the old syntax</phrase>
+
+CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> (
+    BASETYPE = <replaceable class="parameter">base_type</replaceable>,
+    SFUNC = <replaceable class="parameter">sfunc</replaceable>,
+    STYPE = <replaceable class="parameter">state_data_type</replaceable>
+    [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
+    [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
+    [ , FINALFUNC_EXTRA ]
+    [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
+    [ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ]
+    [ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ]
+    [ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ]
+    [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ]
+    [ , MSFUNC = <replaceable class="parameter">msfunc</replaceable> ]
+    [ , MINVFUNC = <replaceable class="parameter">minvfunc</replaceable> ]
+    [ , MSTYPE = <replaceable class="parameter">mstate_data_type</replaceable> ]
+    [ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ]
+    [ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ]
+    [ , MFINALFUNC_EXTRA ]
+    [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
+    [ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ]
+    [ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ]
+)
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>CREATE AGGREGATE</command> defines a new aggregate function.
+   <command>CREATE OR REPLACE AGGREGATE</command> will either define a new
+   aggregate function or replace an existing definition. Some basic and
+   commonly-used aggregate functions are included with the distribution; they
+   are documented in <xref linkend="functions-aggregate"/>. If one defines new
+   types or needs an aggregate function not already provided, then
+   <command>CREATE AGGREGATE</command> can be used to provide the desired
+   features.
+  </para>
+
+  <para>
+   When replacing an existing definition, the argument types, result type,
+   and number of direct arguments may not be changed. Also, the new definition
+   must be of the same kind (ordinary aggregate, ordered-set aggregate, or
+   hypothetical-set aggregate) as the old one.
+  </para>
+
+  <para>
+   If a schema name is given (for example, <literal>CREATE AGGREGATE
+   myschema.myagg ...</literal>) then the aggregate function is created in the
+   specified schema.  Otherwise it is created in the current schema.
+  </para>
+
+  <para>
+   An aggregate function is identified by its name and input data type(s).
+   Two aggregates in the same schema can have the same name if they operate on
+   different input types.  The
+   name and input data type(s) of an aggregate must also be distinct from
+   the name and input data type(s) of every ordinary function in the same
+   schema.
+   This behavior is identical to overloading of ordinary function names
+   (see <xref linkend="sql-createfunction"/>).
+  </para>
+
+  <para>
+   A simple aggregate function is made from one or two ordinary
+   functions:
+   a state transition function
+   <replaceable class="parameter">sfunc</replaceable>,
+   and an optional final calculation function
+   <replaceable class="parameter">ffunc</replaceable>.
+   These are used as follows:
+<programlisting>
+<replaceable class="parameter">sfunc</replaceable>( internal-state, next-data-values ) ---> next-internal-state
+<replaceable class="parameter">ffunc</replaceable>( internal-state ) ---> aggregate-value
+</programlisting>
+  </para>
+
+  <para>
+   <productname>PostgreSQL</productname> creates a temporary variable
+   of data type <replaceable class="parameter">stype</replaceable>
+   to hold the current internal state of the aggregate.  At each input row,
+   the aggregate argument value(s) are calculated and
+   the state transition function is invoked with the current state value
+   and the new argument value(s) to calculate a new
+   internal state value.  After all the rows have been processed,
+   the final function is invoked once to calculate the aggregate's return
+   value.  If there is no final function then the ending state value
+   is returned as-is.
+  </para>
+
+  <para>
+   An aggregate function can provide an initial condition,
+   that is, an initial value for the internal state value.
+   This is specified and stored in the database as a value of type
+   <type>text</type>, but it must be a valid external representation
+   of a constant of the state value data type.  If it is not supplied
+   then the state value starts out null.
+  </para>
+
+  <para>
+   If the state transition function is declared <quote>strict</quote>,
+   then it cannot be called with null inputs.  With such a transition
+   function, aggregate execution behaves as follows.  Rows with any null input
+   values are ignored (the function is not called and the previous state value
+   is retained).  If the initial state value is null, then at the first row
+   with all-nonnull input values, the first argument value replaces the state
+   value, and the transition function is invoked at each subsequent row with
+   all-nonnull input values.
+   This is handy for implementing aggregates like <function>max</function>.
+   Note that this behavior is only available when
+   <replaceable class="parameter">state_data_type</replaceable>
+   is the same as the first
+   <replaceable class="parameter">arg_data_type</replaceable>.
+   When these types are different, you must supply a nonnull initial
+   condition or use a nonstrict transition function.
+  </para>
+
+  <para>
+   If the state transition function is not strict, then it will be called
+   unconditionally at each input row, and must deal with null inputs
+   and null state values for itself.  This allows the aggregate
+   author to have full control over the aggregate's handling of null values.
+  </para>
+
+  <para>
+   If the final function is declared <quote>strict</quote>, then it will not
+   be called when the ending state value is null; instead a null result
+   will be returned automatically.  (Of course this is just the normal
+   behavior of strict functions.)  In any case the final function has
+   the option of returning a null value.  For example, the final function for
+   <function>avg</function> returns null when it sees there were zero
+   input rows.
+  </para>
+
+  <para>
+   Sometimes it is useful to declare the final function as taking not just
+   the state value, but extra parameters corresponding to the aggregate's
+   input values.  The main reason for doing this is if the final function
+   is polymorphic and the state value's data type would be inadequate to
+   pin down the result type.  These extra parameters are always passed as
+   NULL (and so the final function must not be strict when
+   the <literal>FINALFUNC_EXTRA</literal> option is used), but nonetheless they
+   are valid parameters.  The final function could for example make use
+   of <function>get_fn_expr_argtype</function> to identify the actual argument type
+   in the current call.
+  </para>
+
+  <para>
+   An aggregate can optionally support <firstterm>moving-aggregate mode</firstterm>,
+   as described in <xref linkend="xaggr-moving-aggregates"/>.  This requires
+   specifying the <literal>MSFUNC</literal>, <literal>MINVFUNC</literal>,
+   and <literal>MSTYPE</literal> parameters, and optionally
+   the <literal>MSSPACE</literal>, <literal>MFINALFUNC</literal>,
+   <literal>MFINALFUNC_EXTRA</literal>, <literal>MFINALFUNC_MODIFY</literal>,
+   and <literal>MINITCOND</literal> parameters.  Except for <literal>MINVFUNC</literal>,
+   these parameters work like the corresponding simple-aggregate parameters
+   without <literal>M</literal>; they define a separate implementation of the
+   aggregate that includes an inverse transition function.
+  </para>
+
+  <para>
+   The syntax with <literal>ORDER BY</literal> in the parameter list creates
+   a special type of aggregate called an <firstterm>ordered-set
+   aggregate</firstterm>; or if <literal>HYPOTHETICAL</literal> is specified, then
+   a <firstterm>hypothetical-set aggregate</firstterm> is created.  These
+   aggregates operate over groups of sorted values in order-dependent ways,
+   so that specification of an input sort order is an essential part of a
+   call.  Also, they can have <firstterm>direct</firstterm> arguments, which are
+   arguments that are evaluated only once per aggregation rather than once
+   per input row.  Hypothetical-set aggregates are a subclass of ordered-set
+   aggregates in which some of the direct arguments are required to match,
+   in number and data types, the aggregated argument columns.  This allows
+   the values of those direct arguments to be added to the collection of
+   aggregate-input rows as an additional <quote>hypothetical</quote> row.
+  </para>
+
+  <para>
+   An aggregate can optionally support <firstterm>partial aggregation</firstterm>,
+   as described in <xref linkend="xaggr-partial-aggregates"/>.
+   This requires specifying the <literal>COMBINEFUNC</literal> parameter.
+   If the <replaceable class="parameter">state_data_type</replaceable>
+   is <type>internal</type>, it's usually also appropriate to provide the
+   <literal>SERIALFUNC</literal> and <literal>DESERIALFUNC</literal> parameters so that
+   parallel aggregation is possible.  Note that the aggregate must also be
+   marked <literal>PARALLEL SAFE</literal> to enable parallel aggregation.
+  </para>
+
+  <para>
+   Aggregates that behave like <function>MIN</function> or <function>MAX</function> can
+   sometimes be optimized by looking into an index instead of scanning every
+   input row.  If this aggregate can be so optimized, indicate it by
+   specifying a <firstterm>sort operator</firstterm>.  The basic requirement is that
+   the aggregate must yield the first element in the sort ordering induced by
+   the operator; in other words:
+<programlisting>
+SELECT agg(col) FROM tab;
+</programlisting>
+   must be equivalent to:
+<programlisting>
+SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
+</programlisting>
+   Further assumptions are that the aggregate ignores null inputs, and that
+   it delivers a null result if and only if there were no non-null inputs.
+   Ordinarily, a data type's <literal>&lt;</literal> operator is the proper sort
+   operator for <function>MIN</function>, and <literal>&gt;</literal> is the proper sort
+   operator for <function>MAX</function>.  Note that the optimization will never
+   actually take effect unless the specified operator is the <quote>less
+   than</quote> or <quote>greater than</quote> strategy member of a B-tree
+   index operator class.
+  </para>
+
+  <para>
+   To be able to create an aggregate function, you must
+   have <literal>USAGE</literal> privilege on the argument types, the state
+   type(s), and the return type, as well as <literal>EXECUTE</literal>
+   privilege on the supporting functions.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the aggregate function
+      to create.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">argmode</replaceable></term>
+
+    <listitem>
+     <para>
+      The mode of an argument: <literal>IN</literal> or <literal>VARIADIC</literal>.
+      (Aggregate functions do not support <literal>OUT</literal> arguments.)
+      If omitted, the default is <literal>IN</literal>.  Only the last argument
+      can be marked <literal>VARIADIC</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">argname</replaceable></term>
+
+    <listitem>
+     <para>
+      The name of an argument.  This is currently only useful for
+      documentation purposes.  If omitted, the argument has no name.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">arg_data_type</replaceable></term>
+    <listitem>
+     <para>
+      An input data type on which this aggregate function operates.
+      To create a zero-argument aggregate function, write <literal>*</literal>
+      in place of the list of argument specifications.  (An example of such an
+      aggregate is <function>count(*)</function>.)
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">base_type</replaceable></term>
+    <listitem>
+     <para>
+      In the old syntax for <command>CREATE AGGREGATE</command>, the input data type
+      is specified by a <literal>basetype</literal> parameter rather than being
+      written next to the aggregate name.  Note that this syntax allows
+      only one input parameter.  To define a zero-argument aggregate function
+      with this syntax, specify the <literal>basetype</literal> as
+      <literal>"ANY"</literal> (not <literal>*</literal>).
+      Ordered-set aggregates cannot be defined with the old syntax.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">sfunc</replaceable></term>
+    <listitem>
+     <para>
+      The name of the state transition function to be called for each
+      input row.  For a normal <replaceable class="parameter">N</replaceable>-argument
+      aggregate function, the <replaceable class="parameter">sfunc</replaceable>
+      must take <replaceable class="parameter">N</replaceable>+1 arguments,
+      the first being of type <replaceable
+      class="parameter">state_data_type</replaceable> and the rest
+      matching the declared input data type(s) of the aggregate.
+      The function must return a value of type <replaceable
+      class="parameter">state_data_type</replaceable>.  This function
+      takes the current state value and the current input data value(s),
+      and returns the next state value.
+     </para>
+
+     <para>
+      For ordered-set (including hypothetical-set) aggregates, the state
+      transition function receives only the current state value and the
+      aggregated arguments, not the direct arguments.  Otherwise it is the
+      same.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">state_data_type</replaceable></term>
+    <listitem>
+     <para>
+      The data type for the aggregate's state value.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">state_data_size</replaceable></term>
+    <listitem>
+     <para>
+      The approximate average size (in bytes) of the aggregate's state value.
+      If this parameter is omitted or is zero, a default estimate is used
+      based on the <replaceable>state_data_type</replaceable>.
+      The planner uses this value to estimate the memory required for a
+      grouped aggregate query.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">ffunc</replaceable></term>
+    <listitem>
+     <para>
+      The name of the final function called to compute the aggregate's
+      result after all input rows have been traversed.
+      For a normal aggregate, this function
+      must take a single argument of type <replaceable
+      class="parameter">state_data_type</replaceable>.  The return
+      data type of the aggregate is defined as the return type of this
+      function.  If <replaceable class="parameter">ffunc</replaceable>
+      is not specified, then the ending state value is used as the
+      aggregate's result, and the return type is <replaceable
+      class="parameter">state_data_type</replaceable>.
+     </para>
+
+     <para>
+      For ordered-set (including hypothetical-set) aggregates, the
+      final function receives not only the final state value,
+      but also the values of all the direct arguments.
+     </para>
+
+     <para>
+      If <literal>FINALFUNC_EXTRA</literal> is specified, then in addition to the
+      final state value and any direct arguments, the final function
+      receives extra NULL values corresponding to the aggregate's regular
+      (aggregated) arguments.  This is mainly useful to allow correct
+      resolution of the aggregate result type when a polymorphic aggregate
+      is being defined.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>FINALFUNC_MODIFY</literal> = { <literal>READ_ONLY</literal> | <literal>SHAREABLE</literal> | <literal>READ_WRITE</literal> }</term>
+    <listitem>
+     <para>
+      This option specifies whether the final function is a pure function
+      that does not modify its arguments.  <literal>READ_ONLY</literal> indicates
+      it does not; the other two values indicate that it may change the
+      transition state value.  See <xref linkend="sql-createaggregate-notes"/>
+      below for more detail.  The
+      default is <literal>READ_ONLY</literal>, except for ordered-set aggregates,
+      for which the default is <literal>READ_WRITE</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">combinefunc</replaceable></term>
+    <listitem>
+     <para>
+      The <replaceable class="parameter">combinefunc</replaceable> function
+      may optionally be specified to allow the aggregate function to support
+      partial aggregation.  If provided,
+      the <replaceable class="parameter">combinefunc</replaceable> must
+      combine two <replaceable class="parameter">state_data_type</replaceable>
+      values, each containing the result of aggregation over some subset of
+      the input values, to produce a
+      new <replaceable class="parameter">state_data_type</replaceable> that
+      represents the result of aggregating over both sets of inputs.  This
+      function can be thought of as
+      an <replaceable class="parameter">sfunc</replaceable>, where instead of
+      acting upon an individual input row and adding it to the running
+      aggregate state, it adds another aggregate state to the running state.
+     </para>
+
+     <para>
+      The <replaceable class="parameter">combinefunc</replaceable> must be
+      declared as taking two arguments of
+      the <replaceable class="parameter">state_data_type</replaceable> and
+      returning a value of
+      the <replaceable class="parameter">state_data_type</replaceable>.
+      Optionally this function may be <quote>strict</quote>. In this case the
+      function will not be called when either of the input states are null;
+      the other state will be taken as the correct result.
+     </para>
+
+     <para>
+      For aggregate functions
+      whose <replaceable class="parameter">state_data_type</replaceable>
+      is <type>internal</type>,
+      the <replaceable class="parameter">combinefunc</replaceable> must not
+      be strict. In this case
+      the <replaceable class="parameter">combinefunc</replaceable> must
+      ensure that null states are handled correctly and that the state being
+      returned is properly stored in the aggregate memory context.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">serialfunc</replaceable></term>
+    <listitem>
+     <para>
+      An aggregate function
+      whose <replaceable class="parameter">state_data_type</replaceable>
+      is <type>internal</type> can participate in parallel aggregation only if it
+      has a <replaceable class="parameter">serialfunc</replaceable> function,
+      which must serialize the aggregate state into a <type>bytea</type> value for
+      transmission to another process.  This function must take a single
+      argument of type <type>internal</type> and return type <type>bytea</type>.  A
+      corresponding <replaceable class="parameter">deserialfunc</replaceable>
+      is also required.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">deserialfunc</replaceable></term>
+    <listitem>
+     <para>
+      Deserialize a previously serialized aggregate state back into
+      <replaceable class="parameter">state_data_type</replaceable>. This
+      function must take two arguments of types <type>bytea</type>
+      and <type>internal</type>, and produce a result of type <type>internal</type>.
+      (Note: the second, <type>internal</type> argument is unused, but is required
+      for type safety reasons.)
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">initial_condition</replaceable></term>
+    <listitem>
+     <para>
+      The initial setting for the state value.  This must be a string
+      constant in the form accepted for the data type <replaceable
+      class="parameter">state_data_type</replaceable>.  If not
+      specified, the state value starts out null.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">msfunc</replaceable></term>
+    <listitem>
+     <para>
+      The name of the forward state transition function to be called for each
+      input row in moving-aggregate mode.  This is exactly like the regular
+      transition function, except that its first argument and result are of
+      type <replaceable>mstate_data_type</replaceable>, which might be different
+      from <replaceable>state_data_type</replaceable>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">minvfunc</replaceable></term>
+    <listitem>
+     <para>
+      The name of the inverse state transition function to be used in
+      moving-aggregate mode.  This function has the same argument and
+      result types as <replaceable>msfunc</replaceable>, but it is used to remove
+      a value from the current aggregate state, rather than add a value to
+      it.  The inverse transition function must have the same strictness
+      attribute as the forward state transition function.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">mstate_data_type</replaceable></term>
+    <listitem>
+     <para>
+      The data type for the aggregate's state value, when using
+      moving-aggregate mode.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">mstate_data_size</replaceable></term>
+    <listitem>
+     <para>
+      The approximate average size (in bytes) of the aggregate's state
+      value, when using moving-aggregate mode.  This works the same as
+      <replaceable>state_data_size</replaceable>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">mffunc</replaceable></term>
+    <listitem>
+     <para>
+      The name of the final function called to compute the aggregate's
+      result after all input rows have been traversed, when using
+      moving-aggregate mode.  This works the same as <replaceable>ffunc</replaceable>,
+      except that its first argument's type
+      is <replaceable>mstate_data_type</replaceable> and extra dummy arguments are
+      specified by writing <literal>MFINALFUNC_EXTRA</literal>.
+      The aggregate result type determined by <replaceable>mffunc</replaceable>
+      or <replaceable>mstate_data_type</replaceable> must match that determined by the
+      aggregate's regular implementation.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>MFINALFUNC_MODIFY</literal> = { <literal>READ_ONLY</literal> | <literal>SHAREABLE</literal> | <literal>READ_WRITE</literal> }</term>
+    <listitem>
+     <para>
+      This option is like <literal>FINALFUNC_MODIFY</literal>, but it describes
+      the behavior of the moving-aggregate final function.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">minitial_condition</replaceable></term>
+    <listitem>
+     <para>
+      The initial setting for the state value, when using moving-aggregate
+      mode.  This works the same as <replaceable>initial_condition</replaceable>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">sort_operator</replaceable></term>
+    <listitem>
+     <para>
+      The associated sort operator for a <function>MIN</function>- or
+      <function>MAX</function>-like aggregate.
+      This is just an operator name (possibly schema-qualified).
+      The operator is assumed to have the same input data types as
+      the aggregate (which must be a single-argument normal aggregate).
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PARALLEL =</literal> { <literal>SAFE</literal> | <literal>RESTRICTED</literal> | <literal>UNSAFE</literal> }</term>
+    <listitem>
+     <para>
+      The meanings of <literal>PARALLEL SAFE</literal>, <literal>PARALLEL
+      RESTRICTED</literal>, and <literal>PARALLEL UNSAFE</literal> are the same as
+      in <link linkend="sql-createfunction"><command>CREATE FUNCTION</command></link>.  An aggregate will not be
+      considered for parallelization if it is marked <literal>PARALLEL
+      UNSAFE</literal> (which is the default!) or <literal>PARALLEL RESTRICTED</literal>.
+      Note that the parallel-safety markings of the aggregate's support
+      functions are not consulted by the planner, only the marking of the
+      aggregate itself.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>HYPOTHETICAL</literal></term>
+    <listitem>
+     <para>
+      For ordered-set aggregates only, this flag specifies that the aggregate
+      arguments are to be processed according to the requirements for
+      hypothetical-set aggregates: that is, the last few direct arguments must
+      match the data types of the aggregated (<literal>WITHIN GROUP</literal>)
+      arguments.  The <literal>HYPOTHETICAL</literal> flag has no effect on
+      run-time behavior, only on parse-time resolution of the data types and
+      collations of the aggregate's arguments.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
+  <para>
+   The parameters of <command>CREATE AGGREGATE</command> can be
+   written in any order, not just the order illustrated above.
+  </para>
+ </refsect1>
+
+ <refsect1 id="sql-createaggregate-notes" xreflabel="Notes">
+  <title>Notes</title>
+
+   <para>
+    In parameters that specify support function names, you can write
+    a schema name if needed, for example <literal>SFUNC = public.sum</literal>.
+    Do not write argument types there, however &mdash; the argument types
+    of the support functions are determined from other parameters.
+   </para>
+
+   <para>
+    Ordinarily, PostgreSQL functions are expected to be true functions that
+    do not modify their input values.  However, an aggregate transition
+    function, <emphasis>when used in the context of an aggregate</emphasis>,
+    is allowed to cheat and modify its transition-state argument in place.
+    This can provide substantial performance benefits compared to making
+    a fresh copy of the transition state each time.
+   </para>
+
+   <para>
+    Likewise, while an aggregate final function is normally expected not to
+    modify its input values, sometimes it is impractical to avoid modifying
+    the transition-state argument.  Such behavior must be declared using
+    the <literal>FINALFUNC_MODIFY</literal> parameter.
+    The <literal>READ_WRITE</literal>
+    value indicates that the final function modifies the transition state in
+    unspecified ways.  This value prevents use of the aggregate as a window
+    function, and it also prevents merging of transition states for aggregate
+    calls that share the same input values and transition functions.
+    The <literal>SHAREABLE</literal> value indicates that the transition function
+    cannot be applied after the final function, but multiple final-function
+    calls can be performed on the ending transition state value.  This value
+    prevents use of the aggregate as a window function, but it allows merging
+    of transition states.  (That is, the optimization of interest here is not
+    applying the same final function repeatedly, but applying different final
+    functions to the same ending transition state value.  This is allowed as
+    long as none of the final functions are marked <literal>READ_WRITE</literal>.)
+   </para>
+
+   <para>
+    If an aggregate supports moving-aggregate mode, it will improve
+    calculation efficiency when the aggregate is used as a window function
+    for a window with moving frame start (that is, a frame start mode other
+    than <literal>UNBOUNDED PRECEDING</literal>).  Conceptually, the forward
+    transition function adds input values to the aggregate's state when
+    they enter the window frame from the bottom, and the inverse transition
+    function removes them again when they leave the frame at the top.  So,
+    when values are removed, they are always removed in the same order they
+    were added.  Whenever the inverse transition function is invoked, it will
+    thus receive the earliest added but not yet removed argument value(s).
+    The inverse transition function can assume that at least one row will
+    remain in the current state after it removes the oldest row.  (When this
+    would not be the case, the window function mechanism simply starts a
+    fresh aggregation, rather than using the inverse transition function.)
+   </para>
+
+   <para>
+    The forward transition function for moving-aggregate mode is not
+    allowed to return NULL as the new state value. If the inverse
+    transition function returns NULL, this is taken as an indication that
+    the inverse function cannot reverse the state calculation for this
+    particular input, and so the aggregate calculation will be redone from
+    scratch for the current frame starting position.  This convention
+    allows moving-aggregate mode to be used in situations where there are
+    some infrequent cases that are impractical to reverse out of the
+    running state value.
+   </para>
+
+   <para>
+    If no moving-aggregate implementation is supplied,
+    the aggregate can still be used with moving frames,
+    but <productname>PostgreSQL</productname> will recompute the whole
+    aggregation whenever the start of the frame moves.
+    Note that whether or not the aggregate supports moving-aggregate
+    mode, <productname>PostgreSQL</productname> can handle a moving frame
+    end without recalculation; this is done by continuing to add new values
+    to the aggregate's state.  This is why use of an aggregate as a window
+    function requires that the final function be read-only: it must
+    not damage the aggregate's state value, so that the aggregation can be
+    continued even after an aggregate result value has been obtained for
+    one set of frame boundaries.
+   </para>
+
+   <para>
+    The syntax for ordered-set aggregates allows <literal>VARIADIC</literal>
+    to be specified for both the last direct parameter and the last
+    aggregated (<literal>WITHIN GROUP</literal>) parameter.  However, the
+    current implementation restricts use of <literal>VARIADIC</literal>
+    in two ways.  First, ordered-set aggregates can only use
+    <literal>VARIADIC "any"</literal>, not other variadic array types.
+    Second, if the last direct parameter is <literal>VARIADIC "any"</literal>,
+    then there can be only one aggregated parameter and it must also
+    be <literal>VARIADIC "any"</literal>.  (In the representation used in the
+    system catalogs, these two parameters are merged into a single
+    <literal>VARIADIC "any"</literal> item, since <structname>pg_proc</structname> cannot
+    represent functions with more than one <literal>VARIADIC</literal> parameter.)
+    If the aggregate is a hypothetical-set aggregate, the direct arguments
+    that match the <literal>VARIADIC "any"</literal> parameter are the hypothetical
+    ones; any preceding parameters represent additional direct arguments
+    that are not constrained to match the aggregated arguments.
+   </para>
+
+   <para>
+    Currently, ordered-set aggregates do not need to support
+    moving-aggregate mode, since they cannot be used as window functions.
+   </para>
+
+   <para>
+    Partial (including parallel) aggregation is currently not supported for
+    ordered-set aggregates.  Also, it will never be used for aggregate calls
+    that include <literal>DISTINCT</literal> or <literal>ORDER BY</literal> clauses, since
+    those semantics cannot be supported during partial aggregation.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   See <xref linkend="xaggr"/>.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>CREATE AGGREGATE</command> is a
+   <productname>PostgreSQL</productname> language extension.  The SQL
+   standard does not provide for user-defined aggregate functions.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alteraggregate"/></member>
+   <member><xref linkend="sql-dropaggregate"/></member>
+  </simplelist>
+ </refsect1>
+</refentry>
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 13:44:03 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 13:44:03 +0000
commit	293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
tree	fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /doc/src/sgml/ref/create_aggregate.sgml
parent	Initial commit. (diff)
download	postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip