diff options
Diffstat (limited to 'doc/src/sgml/ref/create_aggregate.sgml')
-rw-r--r-- | doc/src/sgml/ref/create_aggregate.sgml | 805 |
1 files changed, 805 insertions, 0 deletions
diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml new file mode 100644 index 0000000..222e0aa --- /dev/null +++ b/doc/src/sgml/ref/create_aggregate.sgml @@ -0,0 +1,805 @@ +<!-- +doc/src/sgml/ref/create_aggregate.sgml +PostgreSQL documentation +--> + +<refentry id="sql-createaggregate"> + <indexterm zone="sql-createaggregate"> + <primary>CREATE AGGREGATE</primary> + </indexterm> + + <refmeta> + <refentrytitle>CREATE AGGREGATE</refentrytitle> + <manvolnum>7</manvolnum> + <refmiscinfo>SQL - Language Statements</refmiscinfo> + </refmeta> + + <refnamediv> + <refname>CREATE AGGREGATE</refname> + <refpurpose>define a new aggregate function</refpurpose> + </refnamediv> + + <refsynopsisdiv> +<synopsis> +CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> ( [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ) ( + SFUNC = <replaceable class="parameter">sfunc</replaceable>, + STYPE = <replaceable class="parameter">state_data_type</replaceable> + [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ] + [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ] + [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ] + [ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ] + [ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ] + [ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ] + [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ] + [ , MSFUNC = <replaceable class="parameter">msfunc</replaceable> ] + [ , MINVFUNC = <replaceable class="parameter">minvfunc</replaceable> ] + [ , MSTYPE = <replaceable class="parameter">mstate_data_type</replaceable> ] + [ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ] + [ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ] + [ , MFINALFUNC_EXTRA ] + [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ] + [ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ] + [ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ] + [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ] +) + +CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> ( [ [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ] + ORDER BY [ <replaceable class="parameter">argmode</replaceable> ] [ <replaceable class="parameter">argname</replaceable> ] <replaceable class="parameter">arg_data_type</replaceable> [ , ... ] ) ( + SFUNC = <replaceable class="parameter">sfunc</replaceable>, + STYPE = <replaceable class="parameter">state_data_type</replaceable> + [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ] + [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ] + [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ] + [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ] + [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ] + [ , HYPOTHETICAL ] +) + +<phrase>or the old syntax</phrase> + +CREATE [ OR REPLACE ] AGGREGATE <replaceable class="parameter">name</replaceable> ( + BASETYPE = <replaceable class="parameter">base_type</replaceable>, + SFUNC = <replaceable class="parameter">sfunc</replaceable>, + STYPE = <replaceable class="parameter">state_data_type</replaceable> + [ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ] + [ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ] + [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ] + [ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ] + [ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ] + [ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ] + [ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ] + [ , MSFUNC = <replaceable class="parameter">msfunc</replaceable> ] + [ , MINVFUNC = <replaceable class="parameter">minvfunc</replaceable> ] + [ , MSTYPE = <replaceable class="parameter">mstate_data_type</replaceable> ] + [ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ] + [ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ] + [ , MFINALFUNC_EXTRA ] + [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ] + [ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ] + [ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ] +) +</synopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>CREATE AGGREGATE</command> defines a new aggregate function. + <command>CREATE OR REPLACE AGGREGATE</command> will either define a new + aggregate function or replace an existing definition. Some basic and + commonly-used aggregate functions are included with the distribution; they + are documented in <xref linkend="functions-aggregate"/>. If one defines new + types or needs an aggregate function not already provided, then + <command>CREATE AGGREGATE</command> can be used to provide the desired + features. + </para> + + <para> + When replacing an existing definition, the argument types, result type, + and number of direct arguments may not be changed. Also, the new definition + must be of the same kind (ordinary aggregate, ordered-set aggregate, or + hypothetical-set aggregate) as the old one. + </para> + + <para> + If a schema name is given (for example, <literal>CREATE AGGREGATE + myschema.myagg ...</literal>) then the aggregate function is created in the + specified schema. Otherwise it is created in the current schema. + </para> + + <para> + An aggregate function is identified by its name and input data type(s). + Two aggregates in the same schema can have the same name if they operate on + different input types. The + name and input data type(s) of an aggregate must also be distinct from + the name and input data type(s) of every ordinary function in the same + schema. + This behavior is identical to overloading of ordinary function names + (see <xref linkend="sql-createfunction"/>). + </para> + + <para> + A simple aggregate function is made from one or two ordinary + functions: + a state transition function + <replaceable class="parameter">sfunc</replaceable>, + and an optional final calculation function + <replaceable class="parameter">ffunc</replaceable>. + These are used as follows: +<programlisting> +<replaceable class="parameter">sfunc</replaceable>( internal-state, next-data-values ) ---> next-internal-state +<replaceable class="parameter">ffunc</replaceable>( internal-state ) ---> aggregate-value +</programlisting> + </para> + + <para> + <productname>PostgreSQL</productname> creates a temporary variable + of data type <replaceable class="parameter">stype</replaceable> + to hold the current internal state of the aggregate. At each input row, + the aggregate argument value(s) are calculated and + the state transition function is invoked with the current state value + and the new argument value(s) to calculate a new + internal state value. After all the rows have been processed, + the final function is invoked once to calculate the aggregate's return + value. If there is no final function then the ending state value + is returned as-is. + </para> + + <para> + An aggregate function can provide an initial condition, + that is, an initial value for the internal state value. + This is specified and stored in the database as a value of type + <type>text</type>, but it must be a valid external representation + of a constant of the state value data type. If it is not supplied + then the state value starts out null. + </para> + + <para> + If the state transition function is declared <quote>strict</quote>, + then it cannot be called with null inputs. With such a transition + function, aggregate execution behaves as follows. Rows with any null input + values are ignored (the function is not called and the previous state value + is retained). If the initial state value is null, then at the first row + with all-nonnull input values, the first argument value replaces the state + value, and the transition function is invoked at each subsequent row with + all-nonnull input values. + This is handy for implementing aggregates like <function>max</function>. + Note that this behavior is only available when + <replaceable class="parameter">state_data_type</replaceable> + is the same as the first + <replaceable class="parameter">arg_data_type</replaceable>. + When these types are different, you must supply a nonnull initial + condition or use a nonstrict transition function. + </para> + + <para> + If the state transition function is not strict, then it will be called + unconditionally at each input row, and must deal with null inputs + and null state values for itself. This allows the aggregate + author to have full control over the aggregate's handling of null values. + </para> + + <para> + If the final function is declared <quote>strict</quote>, then it will not + be called when the ending state value is null; instead a null result + will be returned automatically. (Of course this is just the normal + behavior of strict functions.) In any case the final function has + the option of returning a null value. For example, the final function for + <function>avg</function> returns null when it sees there were zero + input rows. + </para> + + <para> + Sometimes it is useful to declare the final function as taking not just + the state value, but extra parameters corresponding to the aggregate's + input values. The main reason for doing this is if the final function + is polymorphic and the state value's data type would be inadequate to + pin down the result type. These extra parameters are always passed as + NULL (and so the final function must not be strict when + the <literal>FINALFUNC_EXTRA</literal> option is used), but nonetheless they + are valid parameters. The final function could for example make use + of <function>get_fn_expr_argtype</function> to identify the actual argument type + in the current call. + </para> + + <para> + An aggregate can optionally support <firstterm>moving-aggregate mode</firstterm>, + as described in <xref linkend="xaggr-moving-aggregates"/>. This requires + specifying the <literal>MSFUNC</literal>, <literal>MINVFUNC</literal>, + and <literal>MSTYPE</literal> parameters, and optionally + the <literal>MSSPACE</literal>, <literal>MFINALFUNC</literal>, + <literal>MFINALFUNC_EXTRA</literal>, <literal>MFINALFUNC_MODIFY</literal>, + and <literal>MINITCOND</literal> parameters. Except for <literal>MINVFUNC</literal>, + these parameters work like the corresponding simple-aggregate parameters + without <literal>M</literal>; they define a separate implementation of the + aggregate that includes an inverse transition function. + </para> + + <para> + The syntax with <literal>ORDER BY</literal> in the parameter list creates + a special type of aggregate called an <firstterm>ordered-set + aggregate</firstterm>; or if <literal>HYPOTHETICAL</literal> is specified, then + a <firstterm>hypothetical-set aggregate</firstterm> is created. These + aggregates operate over groups of sorted values in order-dependent ways, + so that specification of an input sort order is an essential part of a + call. Also, they can have <firstterm>direct</firstterm> arguments, which are + arguments that are evaluated only once per aggregation rather than once + per input row. Hypothetical-set aggregates are a subclass of ordered-set + aggregates in which some of the direct arguments are required to match, + in number and data types, the aggregated argument columns. This allows + the values of those direct arguments to be added to the collection of + aggregate-input rows as an additional <quote>hypothetical</quote> row. + </para> + + <para> + An aggregate can optionally support <firstterm>partial aggregation</firstterm>, + as described in <xref linkend="xaggr-partial-aggregates"/>. + This requires specifying the <literal>COMBINEFUNC</literal> parameter. + If the <replaceable class="parameter">state_data_type</replaceable> + is <type>internal</type>, it's usually also appropriate to provide the + <literal>SERIALFUNC</literal> and <literal>DESERIALFUNC</literal> parameters so that + parallel aggregation is possible. Note that the aggregate must also be + marked <literal>PARALLEL SAFE</literal> to enable parallel aggregation. + </para> + + <para> + Aggregates that behave like <function>MIN</function> or <function>MAX</function> can + sometimes be optimized by looking into an index instead of scanning every + input row. If this aggregate can be so optimized, indicate it by + specifying a <firstterm>sort operator</firstterm>. The basic requirement is that + the aggregate must yield the first element in the sort ordering induced by + the operator; in other words: +<programlisting> +SELECT agg(col) FROM tab; +</programlisting> + must be equivalent to: +<programlisting> +SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; +</programlisting> + Further assumptions are that the aggregate ignores null inputs, and that + it delivers a null result if and only if there were no non-null inputs. + Ordinarily, a data type's <literal><</literal> operator is the proper sort + operator for <function>MIN</function>, and <literal>></literal> is the proper sort + operator for <function>MAX</function>. Note that the optimization will never + actually take effect unless the specified operator is the <quote>less + than</quote> or <quote>greater than</quote> strategy member of a B-tree + index operator class. + </para> + + <para> + To be able to create an aggregate function, you must + have <literal>USAGE</literal> privilege on the argument types, the state + type(s), and the return type, as well as <literal>EXECUTE</literal> + privilege on the supporting functions. + </para> + </refsect1> + + <refsect1> + <title>Parameters</title> + + <variablelist> + <varlistentry> + <term><replaceable class="parameter">name</replaceable></term> + <listitem> + <para> + The name (optionally schema-qualified) of the aggregate function + to create. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">argmode</replaceable></term> + + <listitem> + <para> + The mode of an argument: <literal>IN</literal> or <literal>VARIADIC</literal>. + (Aggregate functions do not support <literal>OUT</literal> arguments.) + If omitted, the default is <literal>IN</literal>. Only the last argument + can be marked <literal>VARIADIC</literal>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">argname</replaceable></term> + + <listitem> + <para> + The name of an argument. This is currently only useful for + documentation purposes. If omitted, the argument has no name. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">arg_data_type</replaceable></term> + <listitem> + <para> + An input data type on which this aggregate function operates. + To create a zero-argument aggregate function, write <literal>*</literal> + in place of the list of argument specifications. (An example of such an + aggregate is <function>count(*)</function>.) + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">base_type</replaceable></term> + <listitem> + <para> + In the old syntax for <command>CREATE AGGREGATE</command>, the input data type + is specified by a <literal>basetype</literal> parameter rather than being + written next to the aggregate name. Note that this syntax allows + only one input parameter. To define a zero-argument aggregate function + with this syntax, specify the <literal>basetype</literal> as + <literal>"ANY"</literal> (not <literal>*</literal>). + Ordered-set aggregates cannot be defined with the old syntax. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">sfunc</replaceable></term> + <listitem> + <para> + The name of the state transition function to be called for each + input row. For a normal <replaceable class="parameter">N</replaceable>-argument + aggregate function, the <replaceable class="parameter">sfunc</replaceable> + must take <replaceable class="parameter">N</replaceable>+1 arguments, + the first being of type <replaceable + class="parameter">state_data_type</replaceable> and the rest + matching the declared input data type(s) of the aggregate. + The function must return a value of type <replaceable + class="parameter">state_data_type</replaceable>. This function + takes the current state value and the current input data value(s), + and returns the next state value. + </para> + + <para> + For ordered-set (including hypothetical-set) aggregates, the state + transition function receives only the current state value and the + aggregated arguments, not the direct arguments. Otherwise it is the + same. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">state_data_type</replaceable></term> + <listitem> + <para> + The data type for the aggregate's state value. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">state_data_size</replaceable></term> + <listitem> + <para> + The approximate average size (in bytes) of the aggregate's state value. + If this parameter is omitted or is zero, a default estimate is used + based on the <replaceable>state_data_type</replaceable>. + The planner uses this value to estimate the memory required for a + grouped aggregate query. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">ffunc</replaceable></term> + <listitem> + <para> + The name of the final function called to compute the aggregate's + result after all input rows have been traversed. + For a normal aggregate, this function + must take a single argument of type <replaceable + class="parameter">state_data_type</replaceable>. The return + data type of the aggregate is defined as the return type of this + function. If <replaceable class="parameter">ffunc</replaceable> + is not specified, then the ending state value is used as the + aggregate's result, and the return type is <replaceable + class="parameter">state_data_type</replaceable>. + </para> + + <para> + For ordered-set (including hypothetical-set) aggregates, the + final function receives not only the final state value, + but also the values of all the direct arguments. + </para> + + <para> + If <literal>FINALFUNC_EXTRA</literal> is specified, then in addition to the + final state value and any direct arguments, the final function + receives extra NULL values corresponding to the aggregate's regular + (aggregated) arguments. This is mainly useful to allow correct + resolution of the aggregate result type when a polymorphic aggregate + is being defined. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>FINALFUNC_MODIFY</literal> = { <literal>READ_ONLY</literal> | <literal>SHAREABLE</literal> | <literal>READ_WRITE</literal> }</term> + <listitem> + <para> + This option specifies whether the final function is a pure function + that does not modify its arguments. <literal>READ_ONLY</literal> indicates + it does not; the other two values indicate that it may change the + transition state value. See <xref linkend="sql-createaggregate-notes"/> + below for more detail. The + default is <literal>READ_ONLY</literal>, except for ordered-set aggregates, + for which the default is <literal>READ_WRITE</literal>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">combinefunc</replaceable></term> + <listitem> + <para> + The <replaceable class="parameter">combinefunc</replaceable> function + may optionally be specified to allow the aggregate function to support + partial aggregation. If provided, + the <replaceable class="parameter">combinefunc</replaceable> must + combine two <replaceable class="parameter">state_data_type</replaceable> + values, each containing the result of aggregation over some subset of + the input values, to produce a + new <replaceable class="parameter">state_data_type</replaceable> that + represents the result of aggregating over both sets of inputs. This + function can be thought of as + an <replaceable class="parameter">sfunc</replaceable>, where instead of + acting upon an individual input row and adding it to the running + aggregate state, it adds another aggregate state to the running state. + </para> + + <para> + The <replaceable class="parameter">combinefunc</replaceable> must be + declared as taking two arguments of + the <replaceable class="parameter">state_data_type</replaceable> and + returning a value of + the <replaceable class="parameter">state_data_type</replaceable>. + Optionally this function may be <quote>strict</quote>. In this case the + function will not be called when either of the input states are null; + the other state will be taken as the correct result. + </para> + + <para> + For aggregate functions + whose <replaceable class="parameter">state_data_type</replaceable> + is <type>internal</type>, + the <replaceable class="parameter">combinefunc</replaceable> must not + be strict. In this case + the <replaceable class="parameter">combinefunc</replaceable> must + ensure that null states are handled correctly and that the state being + returned is properly stored in the aggregate memory context. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">serialfunc</replaceable></term> + <listitem> + <para> + An aggregate function + whose <replaceable class="parameter">state_data_type</replaceable> + is <type>internal</type> can participate in parallel aggregation only if it + has a <replaceable class="parameter">serialfunc</replaceable> function, + which must serialize the aggregate state into a <type>bytea</type> value for + transmission to another process. This function must take a single + argument of type <type>internal</type> and return type <type>bytea</type>. A + corresponding <replaceable class="parameter">deserialfunc</replaceable> + is also required. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">deserialfunc</replaceable></term> + <listitem> + <para> + Deserialize a previously serialized aggregate state back into + <replaceable class="parameter">state_data_type</replaceable>. This + function must take two arguments of types <type>bytea</type> + and <type>internal</type>, and produce a result of type <type>internal</type>. + (Note: the second, <type>internal</type> argument is unused, but is required + for type safety reasons.) + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">initial_condition</replaceable></term> + <listitem> + <para> + The initial setting for the state value. This must be a string + constant in the form accepted for the data type <replaceable + class="parameter">state_data_type</replaceable>. If not + specified, the state value starts out null. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">msfunc</replaceable></term> + <listitem> + <para> + The name of the forward state transition function to be called for each + input row in moving-aggregate mode. This is exactly like the regular + transition function, except that its first argument and result are of + type <replaceable>mstate_data_type</replaceable>, which might be different + from <replaceable>state_data_type</replaceable>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">minvfunc</replaceable></term> + <listitem> + <para> + The name of the inverse state transition function to be used in + moving-aggregate mode. This function has the same argument and + result types as <replaceable>msfunc</replaceable>, but it is used to remove + a value from the current aggregate state, rather than add a value to + it. The inverse transition function must have the same strictness + attribute as the forward state transition function. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">mstate_data_type</replaceable></term> + <listitem> + <para> + The data type for the aggregate's state value, when using + moving-aggregate mode. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">mstate_data_size</replaceable></term> + <listitem> + <para> + The approximate average size (in bytes) of the aggregate's state + value, when using moving-aggregate mode. This works the same as + <replaceable>state_data_size</replaceable>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">mffunc</replaceable></term> + <listitem> + <para> + The name of the final function called to compute the aggregate's + result after all input rows have been traversed, when using + moving-aggregate mode. This works the same as <replaceable>ffunc</replaceable>, + except that its first argument's type + is <replaceable>mstate_data_type</replaceable> and extra dummy arguments are + specified by writing <literal>MFINALFUNC_EXTRA</literal>. + The aggregate result type determined by <replaceable>mffunc</replaceable> + or <replaceable>mstate_data_type</replaceable> must match that determined by the + aggregate's regular implementation. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>MFINALFUNC_MODIFY</literal> = { <literal>READ_ONLY</literal> | <literal>SHAREABLE</literal> | <literal>READ_WRITE</literal> }</term> + <listitem> + <para> + This option is like <literal>FINALFUNC_MODIFY</literal>, but it describes + the behavior of the moving-aggregate final function. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">minitial_condition</replaceable></term> + <listitem> + <para> + The initial setting for the state value, when using moving-aggregate + mode. This works the same as <replaceable>initial_condition</replaceable>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">sort_operator</replaceable></term> + <listitem> + <para> + The associated sort operator for a <function>MIN</function>- or + <function>MAX</function>-like aggregate. + This is just an operator name (possibly schema-qualified). + The operator is assumed to have the same input data types as + the aggregate (which must be a single-argument normal aggregate). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>PARALLEL =</literal> { <literal>SAFE</literal> | <literal>RESTRICTED</literal> | <literal>UNSAFE</literal> }</term> + <listitem> + <para> + The meanings of <literal>PARALLEL SAFE</literal>, <literal>PARALLEL + RESTRICTED</literal>, and <literal>PARALLEL UNSAFE</literal> are the same as + in <link linkend="sql-createfunction"><command>CREATE FUNCTION</command></link>. An aggregate will not be + considered for parallelization if it is marked <literal>PARALLEL + UNSAFE</literal> (which is the default!) or <literal>PARALLEL RESTRICTED</literal>. + Note that the parallel-safety markings of the aggregate's support + functions are not consulted by the planner, only the marking of the + aggregate itself. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>HYPOTHETICAL</literal></term> + <listitem> + <para> + For ordered-set aggregates only, this flag specifies that the aggregate + arguments are to be processed according to the requirements for + hypothetical-set aggregates: that is, the last few direct arguments must + match the data types of the aggregated (<literal>WITHIN GROUP</literal>) + arguments. The <literal>HYPOTHETICAL</literal> flag has no effect on + run-time behavior, only on parse-time resolution of the data types and + collations of the aggregate's arguments. + </para> + </listitem> + </varlistentry> + </variablelist> + + <para> + The parameters of <command>CREATE AGGREGATE</command> can be + written in any order, not just the order illustrated above. + </para> + </refsect1> + + <refsect1 id="sql-createaggregate-notes" xreflabel="Notes"> + <title>Notes</title> + + <para> + In parameters that specify support function names, you can write + a schema name if needed, for example <literal>SFUNC = public.sum</literal>. + Do not write argument types there, however — the argument types + of the support functions are determined from other parameters. + </para> + + <para> + Ordinarily, PostgreSQL functions are expected to be true functions that + do not modify their input values. However, an aggregate transition + function, <emphasis>when used in the context of an aggregate</emphasis>, + is allowed to cheat and modify its transition-state argument in place. + This can provide substantial performance benefits compared to making + a fresh copy of the transition state each time. + </para> + + <para> + Likewise, while an aggregate final function is normally expected not to + modify its input values, sometimes it is impractical to avoid modifying + the transition-state argument. Such behavior must be declared using + the <literal>FINALFUNC_MODIFY</literal> parameter. + The <literal>READ_WRITE</literal> + value indicates that the final function modifies the transition state in + unspecified ways. This value prevents use of the aggregate as a window + function, and it also prevents merging of transition states for aggregate + calls that share the same input values and transition functions. + The <literal>SHAREABLE</literal> value indicates that the transition function + cannot be applied after the final function, but multiple final-function + calls can be performed on the ending transition state value. This value + prevents use of the aggregate as a window function, but it allows merging + of transition states. (That is, the optimization of interest here is not + applying the same final function repeatedly, but applying different final + functions to the same ending transition state value. This is allowed as + long as none of the final functions are marked <literal>READ_WRITE</literal>.) + </para> + + <para> + If an aggregate supports moving-aggregate mode, it will improve + calculation efficiency when the aggregate is used as a window function + for a window with moving frame start (that is, a frame start mode other + than <literal>UNBOUNDED PRECEDING</literal>). Conceptually, the forward + transition function adds input values to the aggregate's state when + they enter the window frame from the bottom, and the inverse transition + function removes them again when they leave the frame at the top. So, + when values are removed, they are always removed in the same order they + were added. Whenever the inverse transition function is invoked, it will + thus receive the earliest added but not yet removed argument value(s). + The inverse transition function can assume that at least one row will + remain in the current state after it removes the oldest row. (When this + would not be the case, the window function mechanism simply starts a + fresh aggregation, rather than using the inverse transition function.) + </para> + + <para> + The forward transition function for moving-aggregate mode is not + allowed to return NULL as the new state value. If the inverse + transition function returns NULL, this is taken as an indication that + the inverse function cannot reverse the state calculation for this + particular input, and so the aggregate calculation will be redone from + scratch for the current frame starting position. This convention + allows moving-aggregate mode to be used in situations where there are + some infrequent cases that are impractical to reverse out of the + running state value. + </para> + + <para> + If no moving-aggregate implementation is supplied, + the aggregate can still be used with moving frames, + but <productname>PostgreSQL</productname> will recompute the whole + aggregation whenever the start of the frame moves. + Note that whether or not the aggregate supports moving-aggregate + mode, <productname>PostgreSQL</productname> can handle a moving frame + end without recalculation; this is done by continuing to add new values + to the aggregate's state. This is why use of an aggregate as a window + function requires that the final function be read-only: it must + not damage the aggregate's state value, so that the aggregation can be + continued even after an aggregate result value has been obtained for + one set of frame boundaries. + </para> + + <para> + The syntax for ordered-set aggregates allows <literal>VARIADIC</literal> + to be specified for both the last direct parameter and the last + aggregated (<literal>WITHIN GROUP</literal>) parameter. However, the + current implementation restricts use of <literal>VARIADIC</literal> + in two ways. First, ordered-set aggregates can only use + <literal>VARIADIC "any"</literal>, not other variadic array types. + Second, if the last direct parameter is <literal>VARIADIC "any"</literal>, + then there can be only one aggregated parameter and it must also + be <literal>VARIADIC "any"</literal>. (In the representation used in the + system catalogs, these two parameters are merged into a single + <literal>VARIADIC "any"</literal> item, since <structname>pg_proc</structname> cannot + represent functions with more than one <literal>VARIADIC</literal> parameter.) + If the aggregate is a hypothetical-set aggregate, the direct arguments + that match the <literal>VARIADIC "any"</literal> parameter are the hypothetical + ones; any preceding parameters represent additional direct arguments + that are not constrained to match the aggregated arguments. + </para> + + <para> + Currently, ordered-set aggregates do not need to support + moving-aggregate mode, since they cannot be used as window functions. + </para> + + <para> + Partial (including parallel) aggregation is currently not supported for + ordered-set aggregates. Also, it will never be used for aggregate calls + that include <literal>DISTINCT</literal> or <literal>ORDER BY</literal> clauses, since + those semantics cannot be supported during partial aggregation. + </para> + </refsect1> + + <refsect1> + <title>Examples</title> + + <para> + See <xref linkend="xaggr"/>. + </para> + </refsect1> + + <refsect1> + <title>Compatibility</title> + + <para> + <command>CREATE AGGREGATE</command> is a + <productname>PostgreSQL</productname> language extension. The SQL + standard does not provide for user-defined aggregate functions. + </para> + </refsect1> + + <refsect1> + <title>See Also</title> + + <simplelist type="inline"> + <member><xref linkend="sql-alteraggregate"/></member> + <member><xref linkend="sql-dropaggregate"/></member> + </simplelist> + </refsect1> +</refentry> |