diff options
Diffstat (limited to 'doc/src/sgml/plpgsql.sgml')
-rw-r--r-- | doc/src/sgml/plpgsql.sgml | 6092 |
1 files changed, 6092 insertions, 0 deletions
diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml new file mode 100644 index 0000000..21cc7c6 --- /dev/null +++ b/doc/src/sgml/plpgsql.sgml @@ -0,0 +1,6092 @@ +<!-- doc/src/sgml/plpgsql.sgml --> + +<chapter id="plpgsql"> + <title><application>PL/pgSQL</application> — <acronym>SQL</acronym> Procedural Language</title> + + <indexterm zone="plpgsql"> + <primary>PL/pgSQL</primary> + </indexterm> + + <sect1 id="plpgsql-overview"> + <title>Overview</title> + + <para> + <application>PL/pgSQL</application> is a loadable procedural + language for the <productname>PostgreSQL</productname> database + system. The design goals of <application>PL/pgSQL</application> were to create + a loadable procedural language that + + <itemizedlist> + <listitem> + <para> + can be used to create functions, procedures, and triggers, + </para> + </listitem> + <listitem> + <para> + adds control structures to the <acronym>SQL</acronym> language, + </para> + </listitem> + <listitem> + <para> + can perform complex computations, + </para> + </listitem> + <listitem> + <para> + inherits all user-defined types, functions, procedures, and operators, + </para> + </listitem> + <listitem> + <para> + can be defined to be trusted by the server, + </para> + </listitem> + <listitem> + <para> + is easy to use. + </para> + </listitem> + </itemizedlist> + </para> + + <para> + Functions created with <application>PL/pgSQL</application> can be + used anywhere that built-in functions could be used. + For example, it is possible to + create complex conditional computation functions and later use + them to define operators or use them in index expressions. + </para> + + <para> + In <productname>PostgreSQL</productname> 9.0 and later, + <application>PL/pgSQL</application> is installed by default. + However it is still a loadable module, so especially security-conscious + administrators could choose to remove it. + </para> + + <sect2 id="plpgsql-advantages"> + <title>Advantages of Using <application>PL/pgSQL</application></title> + + <para> + <acronym>SQL</acronym> is the language <productname>PostgreSQL</productname> + and most other relational databases use as query language. It's + portable and easy to learn. But every <acronym>SQL</acronym> + statement must be executed individually by the database server. + </para> + + <para> + That means that your client application must send each query to + the database server, wait for it to be processed, receive and + process the results, do some computation, then send further + queries to the server. All this incurs interprocess + communication and will also incur network overhead if your client + is on a different machine than the database server. + </para> + + <para> + With <application>PL/pgSQL</application> you can group a block of + computation and a series of queries <emphasis>inside</emphasis> + the database server, thus having the power of a procedural + language and the ease of use of SQL, but with considerable + savings of client/server communication overhead. + </para> + <itemizedlist> + + <listitem><para> Extra round trips between + client and server are eliminated </para></listitem> + + <listitem><para> Intermediate results that the client does not + need do not have to be marshaled or transferred between server + and client </para></listitem> + + <listitem><para> Multiple rounds of query + parsing can be avoided </para></listitem> + + </itemizedlist> + <para> This can result in a considerable performance increase as + compared to an application that does not use stored functions. + </para> + + <para> + Also, with <application>PL/pgSQL</application> you can use all + the data types, operators and functions of SQL. + </para> + </sect2> + + <sect2 id="plpgsql-args-results"> + <title>Supported Argument and Result Data Types</title> + + <para> + Functions written in <application>PL/pgSQL</application> can accept + as arguments any scalar or array data type supported by the server, + and they can return a result of any of these types. They can also + accept or return any composite type (row type) specified by name. + It is also possible to declare a <application>PL/pgSQL</application> + function as accepting <type>record</type>, which means that any + composite type will do as input, or + as returning <type>record</type>, which means that the result + is a row type whose columns are determined by specification in the + calling query, as discussed in <xref linkend="queries-tablefunctions"/>. + </para> + + <para> + <application>PL/pgSQL</application> functions can be declared to accept a variable + number of arguments by using the <literal>VARIADIC</literal> marker. This + works exactly the same way as for SQL functions, as discussed in + <xref linkend="xfunc-sql-variadic-functions"/>. + </para> + + <para> + <application>PL/pgSQL</application> functions can also be declared to + accept and return the polymorphic types described in + <xref linkend="extend-types-polymorphic"/>, thus allowing the actual data + types handled by the function to vary from call to call. + Examples appear in <xref linkend="plpgsql-declaration-parameters"/>. + </para> + + <para> + <application>PL/pgSQL</application> functions can also be declared to return + a <quote>set</quote> (or table) of any data type that can be returned as + a single instance. Such a function generates its output by executing + <command>RETURN NEXT</command> for each desired element of the result + set, or by using <command>RETURN QUERY</command> to output the result of + evaluating a query. + </para> + + <para> + Finally, a <application>PL/pgSQL</application> function can be declared to return + <type>void</type> if it has no useful return value. (Alternatively, it + could be written as a procedure in that case.) + </para> + + <para> + <application>PL/pgSQL</application> functions can also be declared with output + parameters in place of an explicit specification of the return type. + This does not add any fundamental capability to the language, but + it is often convenient, especially for returning multiple values. + The <literal>RETURNS TABLE</literal> notation can also be used in place + of <literal>RETURNS SETOF</literal>. + </para> + + <para> + Specific examples appear in + <xref linkend="plpgsql-declaration-parameters"/> and + <xref linkend="plpgsql-statements-returning"/>. + </para> + </sect2> + </sect1> + + <sect1 id="plpgsql-structure"> + <title>Structure of <application>PL/pgSQL</application></title> + + <para> + Functions written in <application>PL/pgSQL</application> are defined + to the server by executing <xref linkend="sql-createfunction"/> commands. + Such a command would normally look like, say, +<programlisting> +CREATE FUNCTION somefunc(integer, text) RETURNS integer +AS '<replaceable>function body text</replaceable>' +LANGUAGE plpgsql; +</programlisting> + The function body is simply a string literal so far as <command>CREATE + FUNCTION</command> is concerned. It is often helpful to use dollar quoting + (see <xref linkend="sql-syntax-dollar-quoting"/>) to write the function + body, rather than the normal single quote syntax. Without dollar quoting, + any single quotes or backslashes in the function body must be escaped by + doubling them. Almost all the examples in this chapter use dollar-quoted + literals for their function bodies. + </para> + + <para> + <application>PL/pgSQL</application> is a block-structured language. + The complete text of a function body must be a + <firstterm>block</firstterm>. A block is defined as: + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +<optional> DECLARE + <replaceable>declarations</replaceable> </optional> +BEGIN + <replaceable>statements</replaceable> +END <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + </para> + + <para> + Each declaration and each statement within a block is terminated + by a semicolon. A block that appears within another block must + have a semicolon after <literal>END</literal>, as shown above; + however the final <literal>END</literal> that + concludes a function body does not require a semicolon. + </para> + + <tip> + <para> + A common mistake is to write a semicolon immediately after + <literal>BEGIN</literal>. This is incorrect and will result in a syntax error. + </para> + </tip> + + <para> + A <replaceable>label</replaceable> is only needed if you want to + identify the block for use + in an <literal>EXIT</literal> statement, or to qualify the names of the + variables declared in the block. If a label is given after + <literal>END</literal>, it must match the label at the block's beginning. + </para> + + <para> + All key words are case-insensitive. + Identifiers are implicitly converted to lower case + unless double-quoted, just as they are in ordinary SQL commands. + </para> + + <para> + Comments work the same way in <application>PL/pgSQL</application> code as in + ordinary SQL. A double dash (<literal>--</literal>) starts a comment + that extends to the end of the line. A <literal>/*</literal> starts a + block comment that extends to the matching occurrence of + <literal>*/</literal>. Block comments nest. + </para> + + <para> + Any statement in the statement section of a block + can be a <firstterm>subblock</firstterm>. Subblocks can be used for + logical grouping or to localize variables to a small group + of statements. Variables declared in a subblock mask any + similarly-named variables of outer blocks for the duration + of the subblock; but you can access the outer variables anyway + if you qualify their names with their block's label. For example: +<programlisting> +CREATE FUNCTION somefunc() RETURNS integer AS $$ +<< outerblock >> +DECLARE + quantity integer := 30; +BEGIN + RAISE NOTICE 'Quantity here is %', quantity; -- Prints 30 + quantity := 50; + -- + -- Create a subblock + -- + DECLARE + quantity integer := 80; + BEGIN + RAISE NOTICE 'Quantity here is %', quantity; -- Prints 80 + RAISE NOTICE 'Outer quantity here is %', outerblock.quantity; -- Prints 50 + END; + + RAISE NOTICE 'Quantity here is %', quantity; -- Prints 50 + + RETURN quantity; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <note> + <para> + There is actually a hidden <quote>outer block</quote> surrounding the body + of any <application>PL/pgSQL</application> function. This block provides the + declarations of the function's parameters (if any), as well as some + special variables such as <literal>FOUND</literal> (see + <xref linkend="plpgsql-statements-diagnostics"/>). The outer block is + labeled with the function's name, meaning that parameters and special + variables can be qualified with the function's name. + </para> + </note> + + <para> + It is important not to confuse the use of + <command>BEGIN</command>/<command>END</command> for grouping statements in + <application>PL/pgSQL</application> with the similarly-named SQL commands + for transaction + control. <application>PL/pgSQL</application>'s <command>BEGIN</command>/<command>END</command> + are only for grouping; they do not start or end a transaction. + See <xref linkend="plpgsql-transactions"/> for information on managing + transactions in <application>PL/pgSQL</application>. + Also, a block containing an <literal>EXCEPTION</literal> clause effectively + forms a subtransaction that can be rolled back without affecting the + outer transaction. For more about that see <xref + linkend="plpgsql-error-trapping"/>. + </para> + </sect1> + + <sect1 id="plpgsql-declarations"> + <title>Declarations</title> + + <para> + All variables used in a block must be declared in the + declarations section of the block. + (The only exceptions are that the loop variable of a <literal>FOR</literal> loop + iterating over a range of integer values is automatically declared as an + integer variable, and likewise the loop variable of a <literal>FOR</literal> loop + iterating over a cursor's result is automatically declared as a + record variable.) + </para> + + <para> + <application>PL/pgSQL</application> variables can have any SQL data type, such as + <type>integer</type>, <type>varchar</type>, and + <type>char</type>. + </para> + + <para> + Here are some examples of variable declarations: +<programlisting> +user_id integer; +quantity numeric(5); +url varchar; +myrow tablename%ROWTYPE; +myfield tablename.columnname%TYPE; +arow RECORD; +</programlisting> + </para> + + <para> + The general syntax of a variable declaration is: +<synopsis> +<replaceable>name</replaceable> <optional> CONSTANT </optional> <replaceable>type</replaceable> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> NOT NULL </optional> <optional> { DEFAULT | := | = } <replaceable>expression</replaceable> </optional>; +</synopsis> + The <literal>DEFAULT</literal> clause, if given, specifies the initial value assigned + to the variable when the block is entered. If the <literal>DEFAULT</literal> clause + is not given then the variable is initialized to the + <acronym>SQL</acronym> null value. + The <literal>CONSTANT</literal> option prevents the variable from being + assigned to after initialization, so that its value will remain constant + for the duration of the block. + The <literal>COLLATE</literal> option specifies a collation to use for the + variable (see <xref linkend="plpgsql-declaration-collation"/>). + If <literal>NOT NULL</literal> + is specified, an assignment of a null value results in a run-time + error. All variables declared as <literal>NOT NULL</literal> + must have a nonnull default value specified. + Equal (<literal>=</literal>) can be used instead of PL/SQL-compliant + <literal>:=</literal>. + </para> + + <para> + A variable's default value is evaluated and assigned to the variable + each time the block is entered (not just once per function call). + So, for example, assigning <literal>now()</literal> to a variable of type + <type>timestamp</type> causes the variable to have the + time of the current function call, not the time when the function was + precompiled. + </para> + + <para> + Examples: +<programlisting> +quantity integer DEFAULT 32; +url varchar := 'http://mysite.com'; +transaction_time CONSTANT timestamp with time zone := now(); +</programlisting> + </para> + + <para> + Once declared, a variable's value can be used in later initialization + expressions in the same block, for example: +<programlisting> +DECLARE + x integer := 1; + y integer := x + 1; +</programlisting> + </para> + + <sect2 id="plpgsql-declaration-parameters"> + <title>Declaring Function Parameters</title> + + <para> + Parameters passed to functions are named with the identifiers + <literal>$1</literal>, <literal>$2</literal>, + etc. Optionally, aliases can be declared for + <literal>$<replaceable>n</replaceable></literal> + parameter names for increased readability. Either the alias or the + numeric identifier can then be used to refer to the parameter value. + </para> + + <para> + There are two ways to create an alias. The preferred way is to give a + name to the parameter in the <command>CREATE FUNCTION</command> command, + for example: +<programlisting> +CREATE FUNCTION sales_tax(subtotal real) RETURNS real AS $$ +BEGIN + RETURN subtotal * 0.06; +END; +$$ LANGUAGE plpgsql; +</programlisting> + The other way is to explicitly declare an alias, using the + declaration syntax + +<synopsis> +<replaceable>name</replaceable> ALIAS FOR $<replaceable>n</replaceable>; +</synopsis> + + The same example in this style looks like: +<programlisting> +CREATE FUNCTION sales_tax(real) RETURNS real AS $$ +DECLARE + subtotal ALIAS FOR $1; +BEGIN + RETURN subtotal * 0.06; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <note> + <para> + These two examples are not perfectly equivalent. In the first case, + <literal>subtotal</literal> could be referenced as + <literal>sales_tax.subtotal</literal>, but in the second case it could not. + (Had we attached a label to the inner block, <literal>subtotal</literal> could + be qualified with that label, instead.) + </para> + </note> + + <para> + Some more examples: +<programlisting> +CREATE FUNCTION instr(varchar, integer) RETURNS integer AS $$ +DECLARE + v_string ALIAS FOR $1; + index ALIAS FOR $2; +BEGIN + -- some computations using v_string and index here +END; +$$ LANGUAGE plpgsql; + + +CREATE FUNCTION concat_selected_fields(in_t sometablename) RETURNS text AS $$ +BEGIN + RETURN in_t.f1 || in_t.f3 || in_t.f5 || in_t.f7; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + When a <application>PL/pgSQL</application> function is declared + with output parameters, the output parameters are given + <literal>$<replaceable>n</replaceable></literal> names and optional + aliases in just the same way as the normal input parameters. An + output parameter is effectively a variable that starts out NULL; + it should be assigned to during the execution of the function. + The final value of the parameter is what is returned. For instance, + the sales-tax example could also be done this way: + +<programlisting> +CREATE FUNCTION sales_tax(subtotal real, OUT tax real) AS $$ +BEGIN + tax := subtotal * 0.06; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + Notice that we omitted <literal>RETURNS real</literal> — we could have + included it, but it would be redundant. + </para> + + <para> + To call a function with <literal>OUT</literal> parameters, omit the + output parameter(s) in the function call: +<programlisting> +SELECT sales_tax(100.00); +</programlisting> + </para> + + <para> + Output parameters are most useful when returning multiple values. + A trivial example is: + +<programlisting> +CREATE FUNCTION sum_n_product(x int, y int, OUT sum int, OUT prod int) AS $$ +BEGIN + sum := x + y; + prod := x * y; +END; +$$ LANGUAGE plpgsql; + +SELECT * FROM sum_n_product(2, 4); + sum | prod +-----+------ + 6 | 8 +</programlisting> + + As discussed in <xref linkend="xfunc-output-parameters"/>, this + effectively creates an anonymous record type for the function's + results. If a <literal>RETURNS</literal> clause is given, it must say + <literal>RETURNS record</literal>. + </para> + + <para> + This also works with procedures, for example: + +<programlisting> +CREATE PROCEDURE sum_n_product(x int, y int, OUT sum int, OUT prod int) AS $$ +BEGIN + sum := x + y; + prod := x * y; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + In a call to a procedure, all the parameters must be specified. For + output parameters, <literal>NULL</literal> may be specified when + calling the procedure from plain SQL: +<programlisting> +CALL sum_n_product(2, 4, NULL, NULL); + sum | prod +-----+------ + 6 | 8 +</programlisting> + + However, when calling a procedure + from <application>PL/pgSQL</application>, you should instead write a + variable for any output parameter; the variable will receive the result + of the call. See <xref linkend="plpgsql-statements-calling-procedure"/> + for details. + </para> + + <para> + Another way to declare a <application>PL/pgSQL</application> function + is with <literal>RETURNS TABLE</literal>, for example: + +<programlisting> +CREATE FUNCTION extended_sales(p_itemno int) +RETURNS TABLE(quantity int, total numeric) AS $$ +BEGIN + RETURN QUERY SELECT s.quantity, s.quantity * s.price FROM sales AS s + WHERE s.itemno = p_itemno; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + This is exactly equivalent to declaring one or more <literal>OUT</literal> + parameters and specifying <literal>RETURNS SETOF + <replaceable>sometype</replaceable></literal>. + </para> + + <para> + When the return type of a <application>PL/pgSQL</application> function + is declared as a polymorphic type (see + <xref linkend="extend-types-polymorphic"/>), a special + parameter <literal>$0</literal> is created. Its data type is the actual + return type of the function, as deduced from the actual input types. + This allows the function to access its actual return type + as shown in <xref linkend="plpgsql-declaration-type"/>. + <literal>$0</literal> is initialized to null and can be modified by + the function, so it can be used to hold the return value if desired, + though that is not required. <literal>$0</literal> can also be + given an alias. For example, this function works on any data type + that has a <literal>+</literal> operator: + +<programlisting> +CREATE FUNCTION add_three_values(v1 anyelement, v2 anyelement, v3 anyelement) +RETURNS anyelement AS $$ +DECLARE + result ALIAS FOR $0; +BEGIN + result := v1 + v2 + v3; + RETURN result; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + The same effect can be obtained by declaring one or more output parameters as + polymorphic types. In this case the + special <literal>$0</literal> parameter is not used; the output + parameters themselves serve the same purpose. For example: + +<programlisting> +CREATE FUNCTION add_three_values(v1 anyelement, v2 anyelement, v3 anyelement, + OUT sum anyelement) +AS $$ +BEGIN + sum := v1 + v2 + v3; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + In practice it might be more useful to declare a polymorphic function + using the <type>anycompatible</type> family of types, so that automatic + promotion of the input arguments to a common type will occur. + For example: + +<programlisting> +CREATE FUNCTION add_three_values(v1 anycompatible, v2 anycompatible, v3 anycompatible) +RETURNS anycompatible AS $$ +BEGIN + RETURN v1 + v2 + v3; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + With this example, a call such as + +<programlisting> +SELECT add_three_values(1, 2, 4.7); +</programlisting> + + will work, automatically promoting the integer inputs to numeric. + The function using <type>anyelement</type> would require you to + cast the three inputs to the same type manually. + </para> + </sect2> + + <sect2 id="plpgsql-declaration-alias"> + <title><literal>ALIAS</literal></title> + +<synopsis> +<replaceable>newname</replaceable> ALIAS FOR <replaceable>oldname</replaceable>; +</synopsis> + + <para> + The <literal>ALIAS</literal> syntax is more general than is suggested in the + previous section: you can declare an alias for any variable, not just + function parameters. The main practical use for this is to assign + a different name for variables with predetermined names, such as + <varname>NEW</varname> or <varname>OLD</varname> within + a trigger function. + </para> + + <para> + Examples: +<programlisting> +DECLARE + prior ALIAS FOR old; + updated ALIAS FOR new; +</programlisting> + </para> + + <para> + Since <literal>ALIAS</literal> creates two different ways to name the same + object, unrestricted use can be confusing. It's best to use it only + for the purpose of overriding predetermined names. + </para> + </sect2> + + <sect2 id="plpgsql-declaration-type"> + <title>Copying Types</title> + +<synopsis> +<replaceable>variable</replaceable>%TYPE +</synopsis> + + <para> + <literal>%TYPE</literal> provides the data type of a variable or + table column. You can use this to declare variables that will hold + database values. For example, let's say you have a column named + <literal>user_id</literal> in your <literal>users</literal> + table. To declare a variable with the same data type as + <literal>users.user_id</literal> you write: +<programlisting> +user_id users.user_id%TYPE; +</programlisting> + </para> + + <para> + By using <literal>%TYPE</literal> you don't need to know the data + type of the structure you are referencing, and most importantly, + if the data type of the referenced item changes in the future (for + instance: you change the type of <literal>user_id</literal> + from <type>integer</type> to <type>real</type>), you might not need + to change your function definition. + </para> + + <para> + <literal>%TYPE</literal> is particularly valuable in polymorphic + functions, since the data types needed for internal variables can + change from one call to the next. Appropriate variables can be + created by applying <literal>%TYPE</literal> to the function's + arguments or result placeholders. + </para> + + </sect2> + + <sect2 id="plpgsql-declaration-rowtypes"> + <title>Row Types</title> + +<synopsis> +<replaceable>name</replaceable> <replaceable>table_name</replaceable><literal>%ROWTYPE</literal>; +<replaceable>name</replaceable> <replaceable>composite_type_name</replaceable>; +</synopsis> + + <para> + A variable of a composite type is called a <firstterm>row</firstterm> + variable (or <firstterm>row-type</firstterm> variable). Such a variable + can hold a whole row of a <command>SELECT</command> or <command>FOR</command> + query result, so long as that query's column set matches the + declared type of the variable. + The individual fields of the row value + are accessed using the usual dot notation, for example + <literal>rowvar.field</literal>. + </para> + + <para> + A row variable can be declared to have the same type as the rows of + an existing table or view, by using the + <replaceable>table_name</replaceable><literal>%ROWTYPE</literal> + notation; or it can be declared by giving a composite type's name. + (Since every table has an associated composite type of the same name, + it actually does not matter in <productname>PostgreSQL</productname> whether you + write <literal>%ROWTYPE</literal> or not. But the form with + <literal>%ROWTYPE</literal> is more portable.) + </para> + + <para> + Parameters to a function can be + composite types (complete table rows). In that case, the + corresponding identifier <literal>$<replaceable>n</replaceable></literal> will be a row variable, and fields can + be selected from it, for example <literal>$1.user_id</literal>. + </para> + + <para> + Here is an example of using composite types. <structname>table1</structname> + and <structname>table2</structname> are existing tables having at least the + mentioned fields: + +<programlisting> +CREATE FUNCTION merge_fields(t_row table1) RETURNS text AS $$ +DECLARE + t2_row table2%ROWTYPE; +BEGIN + SELECT * INTO t2_row FROM table2 WHERE ... ; + RETURN t_row.f1 || t2_row.f3 || t_row.f5 || t2_row.f7; +END; +$$ LANGUAGE plpgsql; + +SELECT merge_fields(t.*) FROM table1 t WHERE ... ; +</programlisting> + </para> + </sect2> + + <sect2 id="plpgsql-declaration-records"> + <title>Record Types</title> + +<synopsis> +<replaceable>name</replaceable> RECORD; +</synopsis> + + <para> + Record variables are similar to row-type variables, but they have no + predefined structure. They take on the actual row structure of the + row they are assigned during a <command>SELECT</command> or <command>FOR</command> command. The substructure + of a record variable can change each time it is assigned to. + A consequence of this is that until a record variable is first assigned + to, it has no substructure, and any attempt to access a + field in it will draw a run-time error. + </para> + + <para> + Note that <literal>RECORD</literal> is not a true data type, only a placeholder. + One should also realize that when a <application>PL/pgSQL</application> + function is declared to return type <type>record</type>, this is not quite the + same concept as a record variable, even though such a function might + use a record variable to hold its result. In both cases the actual row + structure is unknown when the function is written, but for a function + returning <type>record</type> the actual structure is determined when the + calling query is parsed, whereas a record variable can change its row + structure on-the-fly. + </para> + </sect2> + + <sect2 id="plpgsql-declaration-collation"> + <title>Collation of <application>PL/pgSQL</application> Variables</title> + + <indexterm> + <primary>collation</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + When a <application>PL/pgSQL</application> function has one or more + parameters of collatable data types, a collation is identified for each + function call depending on the collations assigned to the actual + arguments, as described in <xref linkend="collation"/>. If a collation is + successfully identified (i.e., there are no conflicts of implicit + collations among the arguments) then all the collatable parameters are + treated as having that collation implicitly. This will affect the + behavior of collation-sensitive operations within the function. + For example, consider + +<programlisting> +CREATE FUNCTION less_than(a text, b text) RETURNS boolean AS $$ +BEGIN + RETURN a < b; +END; +$$ LANGUAGE plpgsql; + +SELECT less_than(text_field_1, text_field_2) FROM table1; +SELECT less_than(text_field_1, text_field_2 COLLATE "C") FROM table1; +</programlisting> + + The first use of <function>less_than</function> will use the common collation + of <structfield>text_field_1</structfield> and <structfield>text_field_2</structfield> for + the comparison, while the second use will use <literal>C</literal> collation. + </para> + + <para> + Furthermore, the identified collation is also assumed as the collation of + any local variables that are of collatable types. Thus this function + would not work any differently if it were written as + +<programlisting> +CREATE FUNCTION less_than(a text, b text) RETURNS boolean AS $$ +DECLARE + local_a text := a; + local_b text := b; +BEGIN + RETURN local_a < local_b; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + If there are no parameters of collatable data types, or no common + collation can be identified for them, then parameters and local variables + use the default collation of their data type (which is usually the + database's default collation, but could be different for variables of + domain types). + </para> + + <para> + A local variable of a collatable data type can have a different collation + associated with it by including the <literal>COLLATE</literal> option in its + declaration, for example + +<programlisting> +DECLARE + local_a text COLLATE "en_US"; +</programlisting> + + This option overrides the collation that would otherwise be + given to the variable according to the rules above. + </para> + + <para> + Also, of course explicit <literal>COLLATE</literal> clauses can be written inside + a function if it is desired to force a particular collation to be used in + a particular operation. For example, + +<programlisting> +CREATE FUNCTION less_than_c(a text, b text) RETURNS boolean AS $$ +BEGIN + RETURN a < b COLLATE "C"; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + This overrides the collations associated with the table columns, + parameters, or local variables used in the expression, just as would + happen in a plain SQL command. + </para> + </sect2> + </sect1> + + <sect1 id="plpgsql-expressions"> + <title>Expressions</title> + + <para> + All expressions used in <application>PL/pgSQL</application> + statements are processed using the server's main + <acronym>SQL</acronym> executor. For example, when you write + a <application>PL/pgSQL</application> statement like +<synopsis> +IF <replaceable>expression</replaceable> THEN ... +</synopsis> + <application>PL/pgSQL</application> will evaluate the expression by + feeding a query like +<synopsis> +SELECT <replaceable>expression</replaceable> +</synopsis> + to the main SQL engine. While forming the <command>SELECT</command> command, + any occurrences of <application>PL/pgSQL</application> variable names + are replaced by query parameters, as discussed in detail in + <xref linkend="plpgsql-var-subst"/>. + This allows the query plan for the <command>SELECT</command> to + be prepared just once and then reused for subsequent + evaluations with different values of the variables. Thus, what + really happens on first use of an expression is essentially a + <command>PREPARE</command> command. For example, if we have declared + two integer variables <literal>x</literal> and <literal>y</literal>, and we write +<programlisting> +IF x < y THEN ... +</programlisting> + what happens behind the scenes is equivalent to +<programlisting> +PREPARE <replaceable>statement_name</replaceable>(integer, integer) AS SELECT $1 < $2; +</programlisting> + and then this prepared statement is <command>EXECUTE</command>d for each + execution of the <command>IF</command> statement, with the current values + of the <application>PL/pgSQL</application> variables supplied as + parameter values. Normally these details are + not important to a <application>PL/pgSQL</application> user, but + they are useful to know when trying to diagnose a problem. + More information appears in <xref linkend="plpgsql-plan-caching"/>. + </para> + + <para> + Since an <replaceable>expression</replaceable> is converted to a + <literal>SELECT</literal> command, it can contain the same clauses + that an ordinary <literal>SELECT</literal> would, except that it + cannot include a top-level <literal>UNION</literal>, + <literal>INTERSECT</literal>, or <literal>EXCEPT</literal> clause. + Thus for example one could test whether a table is non-empty with +<programlisting> +IF count(*) > 0 FROM my_table THEN ... +</programlisting> + since the <replaceable>expression</replaceable> + between <literal>IF</literal> and <literal>THEN</literal> is parsed as + though it were <literal>SELECT count(*) > 0 FROM my_table</literal>. + The <literal>SELECT</literal> must produce a single column, and not + more than one row. (If it produces no rows, the result is taken as + NULL.) + </para> + </sect1> + + <sect1 id="plpgsql-statements"> + <title>Basic Statements</title> + + <para> + In this section and the following ones, we describe all the statement + types that are explicitly understood by + <application>PL/pgSQL</application>. + Anything not recognized as one of these statement types is presumed + to be an SQL command and is sent to the main database engine to execute, + as described in <xref linkend="plpgsql-statements-general-sql"/>. + </para> + + <sect2 id="plpgsql-statements-assignment"> + <title>Assignment</title> + + <para> + An assignment of a value to a <application>PL/pgSQL</application> + variable is written as: +<synopsis> +<replaceable>variable</replaceable> { := | = } <replaceable>expression</replaceable>; +</synopsis> + As explained previously, the expression in such a statement is evaluated + by means of an SQL <command>SELECT</command> command sent to the main + database engine. The expression must yield a single value (possibly + a row value, if the variable is a row or record variable). The target + variable can be a simple variable (optionally qualified with a block + name), a field of a row or record target, or an element or slice of + an array target. Equal (<literal>=</literal>) can be + used instead of PL/SQL-compliant <literal>:=</literal>. + </para> + + <para> + If the expression's result data type doesn't match the variable's + data type, the value will be coerced as though by an assignment cast + (see <xref linkend="typeconv-query"/>). If no assignment cast is known + for the pair of data types involved, the <application>PL/pgSQL</application> + interpreter will attempt to convert the result value textually, that is + by applying the result type's output function followed by the variable + type's input function. Note that this could result in run-time errors + generated by the input function, if the string form of the result value + is not acceptable to the input function. + </para> + + <para> + Examples: +<programlisting> +tax := subtotal * 0.06; +my_record.user_id := 20; +my_array[j] := 20; +my_array[1:3] := array[1,2,3]; +complex_array[n].realpart = 12.3; +</programlisting> + </para> + </sect2> + + <sect2 id="plpgsql-statements-general-sql"> + <title>Executing SQL Commands</title> + + <para> + In general, any SQL command that does not return rows can be executed + within a <application>PL/pgSQL</application> function just by writing + the command. For example, you could create and fill a table by writing +<programlisting> +CREATE TABLE mytable (id int primary key, data text); +INSERT INTO mytable VALUES (1,'one'), (2,'two'); +</programlisting> + </para> + + <para> + If the command does return rows (for example <command>SELECT</command>, + or <command>INSERT</command>/<command>UPDATE</command>/<command>DELETE</command> + with <literal>RETURNING</literal>), there are two ways to proceed. + When the command will return at most one row, or you only care about + the first row of output, write the command as usual but add + an <literal>INTO</literal> clause to capture the output, as described + in <xref linkend="plpgsql-statements-sql-onerow"/>. + To process all of the output rows, write the command as the data + source for a <command>FOR</command> loop, as described in + <xref linkend="plpgsql-records-iterating"/>. + </para> + + <para> + Usually it is not sufficient just to execute statically-defined SQL + commands. Typically you'll want a command to use varying data values, + or even to vary in more fundamental ways such as by using different + table names at different times. Again, there are two ways to proceed + depending on the situation. + </para> + + <para> + <application>PL/pgSQL</application> variable values can be + automatically inserted into optimizable SQL commands, which + are <command>SELECT</command>, <command>INSERT</command>, + <command>UPDATE</command>, <command>DELETE</command>, + <command>MERGE</command>, and certain + utility commands that incorporate one of these, such + as <command>EXPLAIN</command> and <command>CREATE TABLE ... AS + SELECT</command>. In these commands, + any <application>PL/pgSQL</application> variable name appearing + in the command text is replaced by a query parameter, and then the + current value of the variable is provided as the parameter value + at run time. This is exactly like the processing described earlier + for expressions; for details see <xref linkend="plpgsql-var-subst"/>. + </para> + + <para> + When executing an optimizable SQL command in this way, + <application>PL/pgSQL</application> may cache and re-use the execution + plan for the command, as discussed in + <xref linkend="plpgsql-plan-caching"/>. + </para> + + <para> + Non-optimizable SQL commands (also called utility commands) are not + capable of accepting query parameters. So automatic substitution + of <application>PL/pgSQL</application> variables does not work in such + commands. To include non-constant text in a utility command executed + from <application>PL/pgSQL</application>, you must build the utility + command as a string and then <command>EXECUTE</command> it, as + discussed in <xref linkend="plpgsql-statements-executing-dyn"/>. + </para> + + <para> + <command>EXECUTE</command> must also be used if you want to modify + the command in some other way than supplying a data value, for example + by changing a table name. + </para> + + <para> + Sometimes it is useful to evaluate an expression or <command>SELECT</command> + query but discard the result, for example when calling a function + that has side-effects but no useful result value. To do + this in <application>PL/pgSQL</application>, use the + <command>PERFORM</command> statement: + +<synopsis> +PERFORM <replaceable>query</replaceable>; +</synopsis> + + This executes <replaceable>query</replaceable> and discards the + result. Write the <replaceable>query</replaceable> the same + way you would write an SQL <command>SELECT</command> command, but replace the + initial keyword <command>SELECT</command> with <command>PERFORM</command>. + For <command>WITH</command> queries, use <command>PERFORM</command> and then + place the query in parentheses. (In this case, the query can only + return one row.) + <application>PL/pgSQL</application> variables will be + substituted into the query just as described above, + and the plan is cached in the same way. Also, the special variable + <literal>FOUND</literal> is set to true if the query produced at + least one row, or false if it produced no rows (see + <xref linkend="plpgsql-statements-diagnostics"/>). + </para> + + <note> + <para> + One might expect that writing <command>SELECT</command> directly + would accomplish this result, but at + present the only accepted way to do it is + <command>PERFORM</command>. An SQL command that can return rows, + such as <command>SELECT</command>, will be rejected as an error + unless it has an <literal>INTO</literal> clause as discussed in the + next section. + </para> + </note> + + <para> + An example: +<programlisting> +PERFORM create_mv('cs_session_page_requests_mv', my_query); +</programlisting> + </para> + </sect2> + + <sect2 id="plpgsql-statements-sql-onerow"> + <title>Executing a Command with a Single-Row Result</title> + + <indexterm zone="plpgsql-statements-sql-onerow"> + <primary>SELECT INTO</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <indexterm zone="plpgsql-statements-sql-onerow"> + <primary>RETURNING INTO</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + The result of an SQL command yielding a single row (possibly of multiple + columns) can be assigned to a record variable, row-type variable, or list + of scalar variables. This is done by writing the base SQL command and + adding an <literal>INTO</literal> clause. For example, + +<synopsis> +SELECT <replaceable>select_expressions</replaceable> INTO <optional>STRICT</optional> <replaceable>target</replaceable> FROM ...; +INSERT ... RETURNING <replaceable>expressions</replaceable> INTO <optional>STRICT</optional> <replaceable>target</replaceable>; +UPDATE ... RETURNING <replaceable>expressions</replaceable> INTO <optional>STRICT</optional> <replaceable>target</replaceable>; +DELETE ... RETURNING <replaceable>expressions</replaceable> INTO <optional>STRICT</optional> <replaceable>target</replaceable>; +</synopsis> + + where <replaceable>target</replaceable> can be a record variable, a row + variable, or a comma-separated list of simple variables and + record/row fields. + <application>PL/pgSQL</application> variables will be + substituted into the rest of the command (that is, everything but the + <literal>INTO</literal> clause) just as described above, + and the plan is cached in the same way. + This works for <command>SELECT</command>, + <command>INSERT</command>/<command>UPDATE</command>/<command>DELETE</command> with + <literal>RETURNING</literal>, and certain utility commands + that return row sets, such as <command>EXPLAIN</command>. + Except for the <literal>INTO</literal> clause, the SQL command is the same + as it would be written outside <application>PL/pgSQL</application>. + </para> + + <tip> + <para> + Note that this interpretation of <command>SELECT</command> with <literal>INTO</literal> + is quite different from <productname>PostgreSQL</productname>'s regular + <command>SELECT INTO</command> command, wherein the <literal>INTO</literal> + target is a newly created table. If you want to create a table from a + <command>SELECT</command> result inside a + <application>PL/pgSQL</application> function, use the syntax + <command>CREATE TABLE ... AS SELECT</command>. + </para> + </tip> + + <para> + If a row variable or a variable list is used as target, + the command's result columns + must exactly match the structure of the target as to number and data + types, or else a run-time error + occurs. When a record variable is the target, it automatically + configures itself to the row type of the command's result columns. + </para> + + <para> + The <literal>INTO</literal> clause can appear almost anywhere in the SQL + command. Customarily it is written either just before or just after + the list of <replaceable>select_expressions</replaceable> in a + <command>SELECT</command> command, or at the end of the command for other + command types. It is recommended that you follow this convention + in case the <application>PL/pgSQL</application> parser becomes + stricter in future versions. + </para> + + <para> + If <literal>STRICT</literal> is not specified in the <literal>INTO</literal> + clause, then <replaceable>target</replaceable> will be set to the first + row returned by the command, or to nulls if the command returned no rows. + (Note that <quote>the first row</quote> is not + well-defined unless you've used <literal>ORDER BY</literal>.) Any result rows + after the first row are discarded. + You can check the special <literal>FOUND</literal> variable (see + <xref linkend="plpgsql-statements-diagnostics"/>) to + determine whether a row was returned: + +<programlisting> +SELECT * INTO myrec FROM emp WHERE empname = myname; +IF NOT FOUND THEN + RAISE EXCEPTION 'employee % not found', myname; +END IF; +</programlisting> + + If the <literal>STRICT</literal> option is specified, the command must + return exactly one row or a run-time error will be reported, either + <literal>NO_DATA_FOUND</literal> (no rows) or <literal>TOO_MANY_ROWS</literal> + (more than one row). You can use an exception block if you wish + to catch the error, for example: + +<programlisting> +BEGIN + SELECT * INTO STRICT myrec FROM emp WHERE empname = myname; + EXCEPTION + WHEN NO_DATA_FOUND THEN + RAISE EXCEPTION 'employee % not found', myname; + WHEN TOO_MANY_ROWS THEN + RAISE EXCEPTION 'employee % not unique', myname; +END; +</programlisting> + Successful execution of a command with <literal>STRICT</literal> + always sets <literal>FOUND</literal> to true. + </para> + + <para> + For <command>INSERT</command>/<command>UPDATE</command>/<command>DELETE</command> with + <literal>RETURNING</literal>, <application>PL/pgSQL</application> reports + an error for more than one returned row, even when + <literal>STRICT</literal> is not specified. This is because there + is no option such as <literal>ORDER BY</literal> with which to determine + which affected row should be returned. + </para> + + <para> + If <literal>print_strict_params</literal> is enabled for the function, + then when an error is thrown because the requirements + of <literal>STRICT</literal> are not met, the <literal>DETAIL</literal> part of + the error message will include information about the parameters + passed to the command. + You can change the <literal>print_strict_params</literal> + setting for all functions by setting + <varname>plpgsql.print_strict_params</varname>, though only subsequent + function compilations will be affected. You can also enable it + on a per-function basis by using a compiler option, for example: +<programlisting> +CREATE FUNCTION get_userid(username text) RETURNS int +AS $$ +#print_strict_params on +DECLARE +userid int; +BEGIN + SELECT users.userid INTO STRICT userid + FROM users WHERE users.username = get_userid.username; + RETURN userid; +END; +$$ LANGUAGE plpgsql; +</programlisting> + On failure, this function might produce an error message such as +<programlisting> +ERROR: query returned no rows +DETAIL: parameters: $1 = 'nosuchuser' +CONTEXT: PL/pgSQL function get_userid(text) line 6 at SQL statement +</programlisting> + </para> + + <note> + <para> + The <literal>STRICT</literal> option matches the behavior of + Oracle PL/SQL's <command>SELECT INTO</command> and related statements. + </para> + </note> + + </sect2> + + <sect2 id="plpgsql-statements-executing-dyn"> + <title>Executing Dynamic Commands</title> + + <para> + Oftentimes you will want to generate dynamic commands inside your + <application>PL/pgSQL</application> functions, that is, commands + that will involve different tables or different data types each + time they are executed. <application>PL/pgSQL</application>'s + normal attempts to cache plans for commands (as discussed in + <xref linkend="plpgsql-plan-caching"/>) will not work in such + scenarios. To handle this sort of problem, the + <command>EXECUTE</command> statement is provided: + +<synopsis> +EXECUTE <replaceable class="command">command-string</replaceable> <optional> INTO <optional>STRICT</optional> <replaceable>target</replaceable> </optional> <optional> USING <replaceable>expression</replaceable> <optional>, ... </optional> </optional>; +</synopsis> + + where <replaceable>command-string</replaceable> is an expression + yielding a string (of type <type>text</type>) containing the + command to be executed. The optional <replaceable>target</replaceable> + is a record variable, a row variable, or a comma-separated list of + simple variables and record/row fields, into which the results of + the command will be stored. The optional <literal>USING</literal> expressions + supply values to be inserted into the command. + </para> + + <para> + No substitution of <application>PL/pgSQL</application> variables is done on the + computed command string. Any required variable values must be inserted + in the command string as it is constructed; or you can use parameters + as described below. + </para> + + <para> + Also, there is no plan caching for commands executed via + <command>EXECUTE</command>. Instead, the command is always planned + each time the statement is run. Thus the command + string can be dynamically created within the function to perform + actions on different tables and columns. + </para> + + <para> + The <literal>INTO</literal> clause specifies where the results of + an SQL command returning rows should be assigned. If a row variable + or variable list is provided, it must exactly match the structure + of the command's results; if a + record variable is provided, it will configure itself to match the + result structure automatically. If multiple rows are returned, + only the first will be assigned to the <literal>INTO</literal> + variable(s). If no rows are returned, NULL is assigned to the + <literal>INTO</literal> variable(s). If no <literal>INTO</literal> + clause is specified, the command results are discarded. + </para> + + <para> + If the <literal>STRICT</literal> option is given, an error is reported + unless the command produces exactly one row. + </para> + + <para> + The command string can use parameter values, which are referenced + in the command as <literal>$1</literal>, <literal>$2</literal>, etc. + These symbols refer to values supplied in the <literal>USING</literal> + clause. This method is often preferable to inserting data values + into the command string as text: it avoids run-time overhead of + converting the values to text and back, and it is much less prone + to SQL-injection attacks since there is no need for quoting or escaping. + An example is: +<programlisting> +EXECUTE 'SELECT count(*) FROM mytable WHERE inserted_by = $1 AND inserted <= $2' + INTO c + USING checked_user, checked_date; +</programlisting> + </para> + + <para> + Note that parameter symbols can only be used for data values + — if you want to use dynamically determined table or column + names, you must insert them into the command string textually. + For example, if the preceding query needed to be done against a + dynamically selected table, you could do this: +<programlisting> +EXECUTE 'SELECT count(*) FROM ' + || quote_ident(tabname) + || ' WHERE inserted_by = $1 AND inserted <= $2' + INTO c + USING checked_user, checked_date; +</programlisting> + A cleaner approach is to use <function>format()</function>'s <literal>%I</literal> + specification to insert table or column names with automatic quoting: +<programlisting> +EXECUTE format('SELECT count(*) FROM %I ' + 'WHERE inserted_by = $1 AND inserted <= $2', tabname) + INTO c + USING checked_user, checked_date; +</programlisting> + (This example relies on the SQL rule that string literals separated by a + newline are implicitly concatenated.) + </para> + + <para> + Another restriction on parameter symbols is that they only work in + optimizable SQL commands + (<command>SELECT</command>, <command>INSERT</command>, <command>UPDATE</command>, + <command>DELETE</command>, <command>MERGE</command>, and certain commands containing one of these). + In other statement + types (generically called utility statements), you must insert + values textually even if they are just data values. + </para> + + <para> + An <command>EXECUTE</command> with a simple constant command string and some + <literal>USING</literal> parameters, as in the first example above, is + functionally equivalent to just writing the command directly in + <application>PL/pgSQL</application> and allowing replacement of + <application>PL/pgSQL</application> variables to happen automatically. + The important difference is that <command>EXECUTE</command> will re-plan + the command on each execution, generating a plan that is specific + to the current parameter values; whereas + <application>PL/pgSQL</application> may otherwise create a generic plan + and cache it for re-use. In situations where the best plan depends + strongly on the parameter values, it can be helpful to use + <command>EXECUTE</command> to positively ensure that a generic plan is not + selected. + </para> + + <para> + <command>SELECT INTO</command> is not currently supported within + <command>EXECUTE</command>; instead, execute a plain <command>SELECT</command> + command and specify <literal>INTO</literal> as part of the <command>EXECUTE</command> + itself. + </para> + + <note> + <para> + The <application>PL/pgSQL</application> + <command>EXECUTE</command> statement is not related to the + <link linkend="sql-execute"><command>EXECUTE</command></link> SQL + statement supported by the + <productname>PostgreSQL</productname> server. The server's + <command>EXECUTE</command> statement cannot be used directly within + <application>PL/pgSQL</application> functions (and is not needed). + </para> + </note> + + <example id="plpgsql-quote-literal-example"> + <title>Quoting Values in Dynamic Queries</title> + + <indexterm> + <primary>quote_ident</primary> + <secondary>use in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary>quote_literal</primary> + <secondary>use in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary>quote_nullable</primary> + <secondary>use in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary>format</primary> + <secondary>use in PL/pgSQL</secondary> + </indexterm> + + <para> + When working with dynamic commands you will often have to handle escaping + of single quotes. The recommended method for quoting fixed text in your + function body is dollar quoting. (If you have legacy code that does + not use dollar quoting, please refer to the + overview in <xref linkend="plpgsql-quote-tips"/>, which can save you + some effort when translating said code to a more reasonable scheme.) + </para> + + <para> + Dynamic values require careful handling since they might contain + quote characters. + An example using <function>format()</function> (this assumes that you are + dollar quoting the function body so quote marks need not be doubled): +<programlisting> +EXECUTE format('UPDATE tbl SET %I = $1 ' + 'WHERE key = $2', colname) USING newvalue, keyvalue; +</programlisting> + It is also possible to call the quoting functions directly: +<programlisting> +EXECUTE 'UPDATE tbl SET ' + || quote_ident(colname) + || ' = ' + || quote_literal(newvalue) + || ' WHERE key = ' + || quote_literal(keyvalue); +</programlisting> + </para> + + <para> + This example demonstrates the use of the + <function>quote_ident</function> and + <function>quote_literal</function> functions (see <xref + linkend="functions-string"/>). For safety, expressions containing column + or table identifiers should be passed through + <function>quote_ident</function> before insertion in a dynamic query. + Expressions containing values that should be literal strings in the + constructed command should be passed through <function>quote_literal</function>. + These functions take the appropriate steps to return the input text + enclosed in double or single quotes respectively, with any embedded + special characters properly escaped. + </para> + + <para> + Because <function>quote_literal</function> is labeled + <literal>STRICT</literal>, it will always return null when called with a + null argument. In the above example, if <literal>newvalue</literal> or + <literal>keyvalue</literal> were null, the entire dynamic query string would + become null, leading to an error from <command>EXECUTE</command>. + You can avoid this problem by using the <function>quote_nullable</function> + function, which works the same as <function>quote_literal</function> except that + when called with a null argument it returns the string <literal>NULL</literal>. + For example, +<programlisting> +EXECUTE 'UPDATE tbl SET ' + || quote_ident(colname) + || ' = ' + || quote_nullable(newvalue) + || ' WHERE key = ' + || quote_nullable(keyvalue); +</programlisting> + If you are dealing with values that might be null, you should usually + use <function>quote_nullable</function> in place of <function>quote_literal</function>. + </para> + + <para> + As always, care must be taken to ensure that null values in a query do + not deliver unintended results. For example the <literal>WHERE</literal> clause +<programlisting> +'WHERE key = ' || quote_nullable(keyvalue) +</programlisting> + will never succeed if <literal>keyvalue</literal> is null, because the + result of using the equality operator <literal>=</literal> with a null operand + is always null. If you wish null to work like an ordinary key value, + you would need to rewrite the above as +<programlisting> +'WHERE key IS NOT DISTINCT FROM ' || quote_nullable(keyvalue) +</programlisting> + (At present, <literal>IS NOT DISTINCT FROM</literal> is handled much less + efficiently than <literal>=</literal>, so don't do this unless you must. + See <xref linkend="functions-comparison"/> for + more information on nulls and <literal>IS DISTINCT</literal>.) + </para> + + <para> + Note that dollar quoting is only useful for quoting fixed text. + It would be a very bad idea to try to write this example as: +<programlisting> +EXECUTE 'UPDATE tbl SET ' + || quote_ident(colname) + || ' = $$' + || newvalue + || '$$ WHERE key = ' + || quote_literal(keyvalue); +</programlisting> + because it would break if the contents of <literal>newvalue</literal> + happened to contain <literal>$$</literal>. The same objection would + apply to any other dollar-quoting delimiter you might pick. + So, to safely quote text that is not known in advance, you + <emphasis>must</emphasis> use <function>quote_literal</function>, + <function>quote_nullable</function>, or <function>quote_ident</function>, as appropriate. + </para> + + <para> + Dynamic SQL statements can also be safely constructed using the + <function>format</function> function (see <xref + linkend="functions-string-format"/>). For example: +<programlisting> +EXECUTE format('UPDATE tbl SET %I = %L ' + 'WHERE key = %L', colname, newvalue, keyvalue); +</programlisting> + <literal>%I</literal> is equivalent to <function>quote_ident</function>, and + <literal>%L</literal> is equivalent to <function>quote_nullable</function>. + The <function>format</function> function can be used in conjunction with + the <literal>USING</literal> clause: +<programlisting> +EXECUTE format('UPDATE tbl SET %I = $1 WHERE key = $2', colname) + USING newvalue, keyvalue; +</programlisting> + This form is better because the variables are handled in their native + data type format, rather than unconditionally converting them to + text and quoting them via <literal>%L</literal>. It is also more efficient. + </para> + </example> + + <para> + A much larger example of a dynamic command and + <command>EXECUTE</command> can be seen in <xref + linkend="plpgsql-porting-ex2"/>, which builds and executes a + <command>CREATE FUNCTION</command> command to define a new function. + </para> + </sect2> + + <sect2 id="plpgsql-statements-diagnostics"> + <title>Obtaining the Result Status</title> + + <para> + There are several ways to determine the effect of a command. The + first method is to use the <command>GET DIAGNOSTICS</command> + command, which has the form: + +<synopsis> +GET <optional> CURRENT </optional> DIAGNOSTICS <replaceable>variable</replaceable> { = | := } <replaceable>item</replaceable> <optional> , ... </optional>; +</synopsis> + + This command allows retrieval of system status indicators. + <literal>CURRENT</literal> is a noise word (but see also <command>GET STACKED + DIAGNOSTICS</command> in <xref linkend="plpgsql-exception-diagnostics"/>). + Each <replaceable>item</replaceable> is a key word identifying a status + value to be assigned to the specified <replaceable>variable</replaceable> + (which should be of the right data type to receive it). The currently + available status items are shown + in <xref linkend="plpgsql-current-diagnostics-values"/>. Colon-equal + (<literal>:=</literal>) can be used instead of the SQL-standard <literal>=</literal> + token. An example: +<programlisting> +GET DIAGNOSTICS integer_var = ROW_COUNT; +</programlisting> + </para> + + <table id="plpgsql-current-diagnostics-values"> + <title>Available Diagnostics Items</title> + <tgroup cols="3"> + <colspec colname="col1" colwidth="1*"/> + <colspec colname="col2" colwidth="1*"/> + <colspec colname="col3" colwidth="2*"/> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry><varname>ROW_COUNT</varname></entry> + <entry><type>bigint</type></entry> + <entry>the number of rows processed by the most + recent <acronym>SQL</acronym> command</entry> + </row> + <row> + <entry><literal>PG_CONTEXT</literal></entry> + <entry><type>text</type></entry> + <entry>line(s) of text describing the current call stack + (see <xref linkend="plpgsql-call-stack"/>)</entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + The second method to determine the effects of a command is to check the + special variable named <literal>FOUND</literal>, which is of + type <type>boolean</type>. <literal>FOUND</literal> starts out + false within each <application>PL/pgSQL</application> function call. + It is set by each of the following types of statements: + + <itemizedlist> + <listitem> + <para> + A <command>SELECT INTO</command> statement sets + <literal>FOUND</literal> true if a row is assigned, false if no + row is returned. + </para> + </listitem> + <listitem> + <para> + A <command>PERFORM</command> statement sets <literal>FOUND</literal> + true if it produces (and discards) one or more rows, false if + no row is produced. + </para> + </listitem> + <listitem> + <para> + <command>UPDATE</command>, <command>INSERT</command>, <command>DELETE</command>, + and <command>MERGE</command> + statements set <literal>FOUND</literal> true if at least one + row is affected, false if no row is affected. + </para> + </listitem> + <listitem> + <para> + A <command>FETCH</command> statement sets <literal>FOUND</literal> + true if it returns a row, false if no row is returned. + </para> + </listitem> + <listitem> + <para> + A <command>MOVE</command> statement sets <literal>FOUND</literal> + true if it successfully repositions the cursor, false otherwise. + </para> + </listitem> + <listitem> + <para> + A <command>FOR</command> or <command>FOREACH</command> statement sets + <literal>FOUND</literal> true + if it iterates one or more times, else false. + <literal>FOUND</literal> is set this way when the + loop exits; inside the execution of the loop, + <literal>FOUND</literal> is not modified by the + loop statement, although it might be changed by the + execution of other statements within the loop body. + </para> + </listitem> + <listitem> + <para> + <command>RETURN QUERY</command> and <command>RETURN QUERY + EXECUTE</command> statements set <literal>FOUND</literal> + true if the query returns at least one row, false if no row + is returned. + </para> + </listitem> + </itemizedlist> + + Other <application>PL/pgSQL</application> statements do not change + the state of <literal>FOUND</literal>. + Note in particular that <command>EXECUTE</command> + changes the output of <command>GET DIAGNOSTICS</command>, but + does not change <literal>FOUND</literal>. + </para> + + <para> + <literal>FOUND</literal> is a local variable within each + <application>PL/pgSQL</application> function; any changes to it + affect only the current function. + </para> + + </sect2> + + <sect2 id="plpgsql-statements-null"> + <title>Doing Nothing At All</title> + + <para> + Sometimes a placeholder statement that does nothing is useful. + For example, it can indicate that one arm of an if/then/else + chain is deliberately empty. For this purpose, use the + <command>NULL</command> statement: + +<synopsis> +NULL; +</synopsis> + </para> + + <para> + For example, the following two fragments of code are equivalent: +<programlisting> +BEGIN + y := x / 0; +EXCEPTION + WHEN division_by_zero THEN + NULL; -- ignore the error +END; +</programlisting> + +<programlisting> +BEGIN + y := x / 0; +EXCEPTION + WHEN division_by_zero THEN -- ignore the error +END; +</programlisting> + Which is preferable is a matter of taste. + </para> + + <note> + <para> + In Oracle's PL/SQL, empty statement lists are not allowed, and so + <command>NULL</command> statements are <emphasis>required</emphasis> for situations + such as this. <application>PL/pgSQL</application> allows you to + just write nothing, instead. + </para> + </note> + + </sect2> + </sect1> + + <sect1 id="plpgsql-control-structures"> + <title>Control Structures</title> + + <para> + Control structures are probably the most useful (and + important) part of <application>PL/pgSQL</application>. With + <application>PL/pgSQL</application>'s control structures, + you can manipulate <productname>PostgreSQL</productname> data in a very + flexible and powerful way. + </para> + + <sect2 id="plpgsql-statements-returning"> + <title>Returning from a Function</title> + + <para> + There are two commands available that allow you to return data + from a function: <command>RETURN</command> and <command>RETURN + NEXT</command>. + </para> + + <sect3> + <title><command>RETURN</command></title> + +<synopsis> +RETURN <replaceable>expression</replaceable>; +</synopsis> + + <para> + <command>RETURN</command> with an expression terminates the + function and returns the value of + <replaceable>expression</replaceable> to the caller. This form + is used for <application>PL/pgSQL</application> functions that do + not return a set. + </para> + + <para> + In a function that returns a scalar type, the expression's result will + automatically be cast into the function's return type as described for + assignments. But to return a composite (row) value, you must write an + expression delivering exactly the requested column set. This may + require use of explicit casting. + </para> + + <para> + If you declared the function with output parameters, write just + <command>RETURN</command> with no expression. The current values + of the output parameter variables will be returned. + </para> + + <para> + If you declared the function to return <type>void</type>, a + <command>RETURN</command> statement can be used to exit the function + early; but do not write an expression following + <command>RETURN</command>. + </para> + + <para> + The return value of a function cannot be left undefined. If + control reaches the end of the top-level block of the function + without hitting a <command>RETURN</command> statement, a run-time + error will occur. This restriction does not apply to functions + with output parameters and functions returning <type>void</type>, + however. In those cases a <command>RETURN</command> statement is + automatically executed if the top-level block finishes. + </para> + + <para> + Some examples: + +<programlisting> +-- functions returning a scalar type +RETURN 1 + 2; +RETURN scalar_var; + +-- functions returning a composite type +RETURN composite_type_var; +RETURN (1, 2, 'three'::text); -- must cast columns to correct types +</programlisting> + </para> + </sect3> + + <sect3> + <title><command>RETURN NEXT</command> and <command>RETURN QUERY</command></title> + <indexterm> + <primary>RETURN NEXT</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + <indexterm> + <primary>RETURN QUERY</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + +<synopsis> +RETURN NEXT <replaceable>expression</replaceable>; +RETURN QUERY <replaceable>query</replaceable>; +RETURN QUERY EXECUTE <replaceable class="command">command-string</replaceable> <optional> USING <replaceable>expression</replaceable> <optional>, ... </optional> </optional>; +</synopsis> + + <para> + When a <application>PL/pgSQL</application> function is declared to return + <literal>SETOF <replaceable>sometype</replaceable></literal>, the procedure + to follow is slightly different. In that case, the individual + items to return are specified by a sequence of <command>RETURN + NEXT</command> or <command>RETURN QUERY</command> commands, and + then a final <command>RETURN</command> command with no argument + is used to indicate that the function has finished executing. + <command>RETURN NEXT</command> can be used with both scalar and + composite data types; with a composite result type, an entire + <quote>table</quote> of results will be returned. + <command>RETURN QUERY</command> appends the results of executing + a query to the function's result set. <command>RETURN + NEXT</command> and <command>RETURN QUERY</command> can be freely + intermixed in a single set-returning function, in which case + their results will be concatenated. + </para> + + <para> + <command>RETURN NEXT</command> and <command>RETURN + QUERY</command> do not actually return from the function — + they simply append zero or more rows to the function's result + set. Execution then continues with the next statement in the + <application>PL/pgSQL</application> function. As successive + <command>RETURN NEXT</command> or <command>RETURN + QUERY</command> commands are executed, the result set is built + up. A final <command>RETURN</command>, which should have no + argument, causes control to exit the function (or you can just + let control reach the end of the function). + </para> + + <para> + <command>RETURN QUERY</command> has a variant + <command>RETURN QUERY EXECUTE</command>, which specifies the + query to be executed dynamically. Parameter expressions can + be inserted into the computed query string via <literal>USING</literal>, + in just the same way as in the <command>EXECUTE</command> command. + </para> + + <para> + If you declared the function with output parameters, write just + <command>RETURN NEXT</command> with no expression. On each + execution, the current values of the output parameter + variable(s) will be saved for eventual return as a row of the + result. Note that you must declare the function as returning + <literal>SETOF record</literal> when there are multiple output + parameters, or <literal>SETOF <replaceable>sometype</replaceable></literal> + when there is just one output parameter of type + <replaceable>sometype</replaceable>, in order to create a set-returning + function with output parameters. + </para> + + <para> + Here is an example of a function using <command>RETURN + NEXT</command>: + +<programlisting> +CREATE TABLE foo (fooid INT, foosubid INT, fooname TEXT); +INSERT INTO foo VALUES (1, 2, 'three'); +INSERT INTO foo VALUES (4, 5, 'six'); + +CREATE OR REPLACE FUNCTION get_all_foo() RETURNS SETOF foo AS +$BODY$ +DECLARE + r foo%rowtype; +BEGIN + FOR r IN + SELECT * FROM foo WHERE fooid > 0 + LOOP + -- can do some processing here + RETURN NEXT r; -- return current row of SELECT + END LOOP; + RETURN; +END; +$BODY$ +LANGUAGE plpgsql; + +SELECT * FROM get_all_foo(); +</programlisting> + </para> + + <para> + Here is an example of a function using <command>RETURN + QUERY</command>: + +<programlisting> +CREATE FUNCTION get_available_flightid(date) RETURNS SETOF integer AS +$BODY$ +BEGIN + RETURN QUERY SELECT flightid + FROM flight + WHERE flightdate >= $1 + AND flightdate < ($1 + 1); + + -- Since execution is not finished, we can check whether rows were returned + -- and raise exception if not. + IF NOT FOUND THEN + RAISE EXCEPTION 'No flight at %.', $1; + END IF; + + RETURN; + END; +$BODY$ +LANGUAGE plpgsql; + +-- Returns available flights or raises exception if there are no +-- available flights. +SELECT * FROM get_available_flightid(CURRENT_DATE); +</programlisting> + </para> + + <note> + <para> + The current implementation of <command>RETURN NEXT</command> + and <command>RETURN QUERY</command> stores the entire result set + before returning from the function, as discussed above. That + means that if a <application>PL/pgSQL</application> function produces a + very large result set, performance might be poor: data will be + written to disk to avoid memory exhaustion, but the function + itself will not return until the entire result set has been + generated. A future version of <application>PL/pgSQL</application> might + allow users to define set-returning functions + that do not have this limitation. Currently, the point at + which data begins being written to disk is controlled by the + <xref linkend="guc-work-mem"/> + configuration variable. Administrators who have sufficient + memory to store larger result sets in memory should consider + increasing this parameter. + </para> + </note> + </sect3> + </sect2> + + <sect2 id="plpgsql-statements-returning-procedure"> + <title>Returning from a Procedure</title> + + <para> + A procedure does not have a return value. A procedure can therefore end + without a <command>RETURN</command> statement. If you wish to use + a <command>RETURN</command> statement to exit the code early, write + just <command>RETURN</command> with no expression. + </para> + + <para> + If the procedure has output parameters, the final values of the output + parameter variables will be returned to the caller. + </para> + </sect2> + + <sect2 id="plpgsql-statements-calling-procedure"> + <title>Calling a Procedure</title> + + <para> + A <application>PL/pgSQL</application> function, procedure, + or <command>DO</command> block can call a procedure + using <command>CALL</command>. Output parameters are handled + differently from the way that <command>CALL</command> works in plain + SQL. Each <literal>OUT</literal> or <literal>INOUT</literal> + parameter of the procedure must + correspond to a variable in the <command>CALL</command> statement, and + whatever the procedure returns is assigned back to that variable after + it returns. For example: +<programlisting> +CREATE PROCEDURE triple(INOUT x int) +LANGUAGE plpgsql +AS $$ +BEGIN + x := x * 3; +END; +$$; + +DO $$ +DECLARE myvar int := 5; +BEGIN + CALL triple(myvar); + RAISE NOTICE 'myvar = %', myvar; -- prints 15 +END; +$$; +</programlisting> + The variable corresponding to an output parameter can be a simple + variable or a field of a composite-type variable. Currently, + it cannot be an element of an array. + </para> + </sect2> + + <sect2 id="plpgsql-conditionals"> + <title>Conditionals</title> + + <para> + <command>IF</command> and <command>CASE</command> statements let you execute + alternative commands based on certain conditions. + <application>PL/pgSQL</application> has three forms of <command>IF</command>: + <itemizedlist> + <listitem> + <para><literal>IF ... THEN ... END IF</literal></para> + </listitem> + <listitem> + <para><literal>IF ... THEN ... ELSE ... END IF</literal></para> + </listitem> + <listitem> + <para><literal>IF ... THEN ... ELSIF ... THEN ... ELSE ... END IF</literal></para> + </listitem> + </itemizedlist> + + and two forms of <command>CASE</command>: + <itemizedlist> + <listitem> + <para><literal>CASE ... WHEN ... THEN ... ELSE ... END CASE</literal></para> + </listitem> + <listitem> + <para><literal>CASE WHEN ... THEN ... ELSE ... END CASE</literal></para> + </listitem> + </itemizedlist> + </para> + + <sect3> + <title><literal>IF-THEN</literal></title> + +<synopsis> +IF <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> +END IF; +</synopsis> + + <para> + <literal>IF-THEN</literal> statements are the simplest form of + <literal>IF</literal>. The statements between + <literal>THEN</literal> and <literal>END IF</literal> will be + executed if the condition is true. Otherwise, they are + skipped. + </para> + + <para> + Example: +<programlisting> +IF v_user_id <> 0 THEN + UPDATE users SET email = v_email WHERE user_id = v_user_id; +END IF; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>IF-THEN-ELSE</literal></title> + +<synopsis> +IF <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> +ELSE + <replaceable>statements</replaceable> +END IF; +</synopsis> + + <para> + <literal>IF-THEN-ELSE</literal> statements add to + <literal>IF-THEN</literal> by letting you specify an + alternative set of statements that should be executed if the + condition is not true. (Note this includes the case where the + condition evaluates to NULL.) + </para> + + <para> + Examples: +<programlisting> +IF parentid IS NULL OR parentid = '' +THEN + RETURN fullname; +ELSE + RETURN hp_true_filename(parentid) || '/' || fullname; +END IF; +</programlisting> + +<programlisting> +IF v_count > 0 THEN + INSERT INTO users_count (count) VALUES (v_count); + RETURN 't'; +ELSE + RETURN 'f'; +END IF; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>IF-THEN-ELSIF</literal></title> + +<synopsis> +IF <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> +<optional> ELSIF <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> +<optional> ELSIF <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> + ... +</optional> +</optional> +<optional> ELSE + <replaceable>statements</replaceable> </optional> +END IF; +</synopsis> + + <para> + Sometimes there are more than just two alternatives. + <literal>IF-THEN-ELSIF</literal> provides a convenient + method of checking several alternatives in turn. + The <literal>IF</literal> conditions are tested successively + until the first one that is true is found. Then the + associated statement(s) are executed, after which control + passes to the next statement after <literal>END IF</literal>. + (Any subsequent <literal>IF</literal> conditions are <emphasis>not</emphasis> + tested.) If none of the <literal>IF</literal> conditions is true, + then the <literal>ELSE</literal> block (if any) is executed. + </para> + + <para> + Here is an example: + +<programlisting> +IF number = 0 THEN + result := 'zero'; +ELSIF number > 0 THEN + result := 'positive'; +ELSIF number < 0 THEN + result := 'negative'; +ELSE + -- hmm, the only other possibility is that number is null + result := 'NULL'; +END IF; +</programlisting> + </para> + + <para> + The key word <literal>ELSIF</literal> can also be spelled + <literal>ELSEIF</literal>. + </para> + + <para> + An alternative way of accomplishing the same task is to nest + <literal>IF-THEN-ELSE</literal> statements, as in the + following example: + +<programlisting> +IF demo_row.sex = 'm' THEN + pretty_sex := 'man'; +ELSE + IF demo_row.sex = 'f' THEN + pretty_sex := 'woman'; + END IF; +END IF; +</programlisting> + </para> + + <para> + However, this method requires writing a matching <literal>END IF</literal> + for each <literal>IF</literal>, so it is much more cumbersome than + using <literal>ELSIF</literal> when there are many alternatives. + </para> + </sect3> + + <sect3> + <title>Simple <literal>CASE</literal></title> + +<synopsis> +CASE <replaceable>search-expression</replaceable> + WHEN <replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> <optional> ... </optional></optional> THEN + <replaceable>statements</replaceable> + <optional> WHEN <replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> <optional> ... </optional></optional> THEN + <replaceable>statements</replaceable> + ... </optional> + <optional> ELSE + <replaceable>statements</replaceable> </optional> +END CASE; +</synopsis> + + <para> + The simple form of <command>CASE</command> provides conditional execution + based on equality of operands. The <replaceable>search-expression</replaceable> + is evaluated (once) and successively compared to each + <replaceable>expression</replaceable> in the <literal>WHEN</literal> clauses. + If a match is found, then the corresponding + <replaceable>statements</replaceable> are executed, and then control + passes to the next statement after <literal>END CASE</literal>. (Subsequent + <literal>WHEN</literal> expressions are not evaluated.) If no match is + found, the <literal>ELSE</literal> <replaceable>statements</replaceable> are + executed; but if <literal>ELSE</literal> is not present, then a + <literal>CASE_NOT_FOUND</literal> exception is raised. + </para> + + <para> + Here is a simple example: + +<programlisting> +CASE x + WHEN 1, 2 THEN + msg := 'one or two'; + ELSE + msg := 'other value than one or two'; +END CASE; +</programlisting> + </para> + </sect3> + + <sect3> + <title>Searched <literal>CASE</literal></title> + +<synopsis> +CASE + WHEN <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> + <optional> WHEN <replaceable>boolean-expression</replaceable> THEN + <replaceable>statements</replaceable> + ... </optional> + <optional> ELSE + <replaceable>statements</replaceable> </optional> +END CASE; +</synopsis> + + <para> + The searched form of <command>CASE</command> provides conditional execution + based on truth of Boolean expressions. Each <literal>WHEN</literal> clause's + <replaceable>boolean-expression</replaceable> is evaluated in turn, + until one is found that yields <literal>true</literal>. Then the + corresponding <replaceable>statements</replaceable> are executed, and + then control passes to the next statement after <literal>END CASE</literal>. + (Subsequent <literal>WHEN</literal> expressions are not evaluated.) + If no true result is found, the <literal>ELSE</literal> + <replaceable>statements</replaceable> are executed; + but if <literal>ELSE</literal> is not present, then a + <literal>CASE_NOT_FOUND</literal> exception is raised. + </para> + + <para> + Here is an example: + +<programlisting> +CASE + WHEN x BETWEEN 0 AND 10 THEN + msg := 'value is between zero and ten'; + WHEN x BETWEEN 11 AND 20 THEN + msg := 'value is between eleven and twenty'; +END CASE; +</programlisting> + </para> + + <para> + This form of <command>CASE</command> is entirely equivalent to + <literal>IF-THEN-ELSIF</literal>, except for the rule that reaching + an omitted <literal>ELSE</literal> clause results in an error rather + than doing nothing. + </para> + + </sect3> + </sect2> + + <sect2 id="plpgsql-control-structures-loops"> + <title>Simple Loops</title> + + <indexterm zone="plpgsql-control-structures-loops"> + <primary>loop</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + With the <literal>LOOP</literal>, <literal>EXIT</literal>, + <literal>CONTINUE</literal>, <literal>WHILE</literal>, <literal>FOR</literal>, + and <literal>FOREACH</literal> statements, you can arrange for your + <application>PL/pgSQL</application> function to repeat a series of commands. + </para> + + <sect3> + <title><literal>LOOP</literal></title> + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + + <para> + <literal>LOOP</literal> defines an unconditional loop that is repeated + indefinitely until terminated by an <literal>EXIT</literal> or + <command>RETURN</command> statement. The optional + <replaceable>label</replaceable> can be used by <literal>EXIT</literal> + and <literal>CONTINUE</literal> statements within nested loops to + specify which loop those statements refer to. + </para> + </sect3> + + <sect3> + <title><literal>EXIT</literal></title> + + <indexterm> + <primary>EXIT</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + +<synopsis> +EXIT <optional> <replaceable>label</replaceable> </optional> <optional> WHEN <replaceable>boolean-expression</replaceable> </optional>; +</synopsis> + + <para> + If no <replaceable>label</replaceable> is given, the innermost + loop is terminated and the statement following <literal>END + LOOP</literal> is executed next. If <replaceable>label</replaceable> + is given, it must be the label of the current or some outer + level of nested loop or block. Then the named loop or block is + terminated and control continues with the statement after the + loop's/block's corresponding <literal>END</literal>. + </para> + + <para> + If <literal>WHEN</literal> is specified, the loop exit occurs only if + <replaceable>boolean-expression</replaceable> is true. Otherwise, control passes + to the statement after <literal>EXIT</literal>. + </para> + + <para> + <literal>EXIT</literal> can be used with all types of loops; it is + not limited to use with unconditional loops. + </para> + + <para> + When used with a + <literal>BEGIN</literal> block, <literal>EXIT</literal> passes + control to the next statement after the end of the block. + Note that a label must be used for this purpose; an unlabeled + <literal>EXIT</literal> is never considered to match a + <literal>BEGIN</literal> block. (This is a change from + pre-8.4 releases of <productname>PostgreSQL</productname>, which + would allow an unlabeled <literal>EXIT</literal> to match + a <literal>BEGIN</literal> block.) + </para> + + <para> + Examples: +<programlisting> +LOOP + -- some computations + IF count > 0 THEN + EXIT; -- exit loop + END IF; +END LOOP; + +LOOP + -- some computations + EXIT WHEN count > 0; -- same result as previous example +END LOOP; + +<<ablock>> +BEGIN + -- some computations + IF stocks > 100000 THEN + EXIT ablock; -- causes exit from the BEGIN block + END IF; + -- computations here will be skipped when stocks > 100000 +END; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>CONTINUE</literal></title> + + <indexterm> + <primary>CONTINUE</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + +<synopsis> +CONTINUE <optional> <replaceable>label</replaceable> </optional> <optional> WHEN <replaceable>boolean-expression</replaceable> </optional>; +</synopsis> + + <para> + If no <replaceable>label</replaceable> is given, the next iteration of + the innermost loop is begun. That is, all statements remaining + in the loop body are skipped, and control returns + to the loop control expression (if any) to determine whether + another loop iteration is needed. + If <replaceable>label</replaceable> is present, it + specifies the label of the loop whose execution will be + continued. + </para> + + <para> + If <literal>WHEN</literal> is specified, the next iteration of the + loop is begun only if <replaceable>boolean-expression</replaceable> is + true. Otherwise, control passes to the statement after + <literal>CONTINUE</literal>. + </para> + + <para> + <literal>CONTINUE</literal> can be used with all types of loops; it + is not limited to use with unconditional loops. + </para> + + <para> + Examples: +<programlisting> +LOOP + -- some computations + EXIT WHEN count > 100; + CONTINUE WHEN count < 50; + -- some computations for count IN [50 .. 100] +END LOOP; +</programlisting> + </para> + </sect3> + + + <sect3> + <title><literal>WHILE</literal></title> + + <indexterm> + <primary>WHILE</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +WHILE <replaceable>boolean-expression</replaceable> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + + <para> + The <literal>WHILE</literal> statement repeats a + sequence of statements so long as the + <replaceable>boolean-expression</replaceable> + evaluates to true. The expression is checked just before + each entry to the loop body. + </para> + + <para> + For example: +<programlisting> +WHILE amount_owed > 0 AND gift_certificate_balance > 0 LOOP + -- some computations here +END LOOP; + +WHILE NOT done LOOP + -- some computations here +END LOOP; +</programlisting> + </para> + </sect3> + + <sect3 id="plpgsql-integer-for"> + <title><literal>FOR</literal> (Integer Variant)</title> + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +FOR <replaceable>name</replaceable> IN <optional> REVERSE </optional> <replaceable>expression</replaceable> .. <replaceable>expression</replaceable> <optional> BY <replaceable>expression</replaceable> </optional> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + + <para> + This form of <literal>FOR</literal> creates a loop that iterates over a range + of integer values. The variable + <replaceable>name</replaceable> is automatically defined as type + <type>integer</type> and exists only inside the loop (any existing + definition of the variable name is ignored within the loop). + The two expressions giving + the lower and upper bound of the range are evaluated once when entering + the loop. If the <literal>BY</literal> clause isn't specified the iteration + step is 1, otherwise it's the value specified in the <literal>BY</literal> + clause, which again is evaluated once on loop entry. + If <literal>REVERSE</literal> is specified then the step value is + subtracted, rather than added, after each iteration. + </para> + + <para> + Some examples of integer <literal>FOR</literal> loops: +<programlisting> +FOR i IN 1..10 LOOP + -- i will take on the values 1,2,3,4,5,6,7,8,9,10 within the loop +END LOOP; + +FOR i IN REVERSE 10..1 LOOP + -- i will take on the values 10,9,8,7,6,5,4,3,2,1 within the loop +END LOOP; + +FOR i IN REVERSE 10..1 BY 2 LOOP + -- i will take on the values 10,8,6,4,2 within the loop +END LOOP; +</programlisting> + </para> + + <para> + If the lower bound is greater than the upper bound (or less than, + in the <literal>REVERSE</literal> case), the loop body is not + executed at all. No error is raised. + </para> + + <para> + If a <replaceable>label</replaceable> is attached to the + <literal>FOR</literal> loop then the integer loop variable can be + referenced with a qualified name, using that + <replaceable>label</replaceable>. + </para> + </sect3> + </sect2> + + <sect2 id="plpgsql-records-iterating"> + <title>Looping through Query Results</title> + + <para> + Using a different type of <literal>FOR</literal> loop, you can iterate through + the results of a query and manipulate that data + accordingly. The syntax is: +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +FOR <replaceable>target</replaceable> IN <replaceable>query</replaceable> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + The <replaceable>target</replaceable> is a record variable, row variable, + or comma-separated list of scalar variables. + The <replaceable>target</replaceable> is successively assigned each row + resulting from the <replaceable>query</replaceable> and the loop body is + executed for each row. Here is an example: +<programlisting> +CREATE FUNCTION refresh_mviews() RETURNS integer AS $$ +DECLARE + mviews RECORD; +BEGIN + RAISE NOTICE 'Refreshing all materialized views...'; + + FOR mviews IN + SELECT n.nspname AS mv_schema, + c.relname AS mv_name, + pg_catalog.pg_get_userbyid(c.relowner) AS owner + FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON (n.oid = c.relnamespace) + WHERE c.relkind = 'm' + ORDER BY 1 + LOOP + + -- Now "mviews" has one record with information about the materialized view + + RAISE NOTICE 'Refreshing materialized view %.% (owner: %)...', + quote_ident(mviews.mv_schema), + quote_ident(mviews.mv_name), + quote_ident(mviews.owner); + EXECUTE format('REFRESH MATERIALIZED VIEW %I.%I', mviews.mv_schema, mviews.mv_name); + END LOOP; + + RAISE NOTICE 'Done refreshing materialized views.'; + RETURN 1; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + If the loop is terminated by an <literal>EXIT</literal> statement, the last + assigned row value is still accessible after the loop. + </para> + + <para> + The <replaceable>query</replaceable> used in this type of <literal>FOR</literal> + statement can be any SQL command that returns rows to the caller: + <command>SELECT</command> is the most common case, + but you can also use <command>INSERT</command>, <command>UPDATE</command>, or + <command>DELETE</command> with a <literal>RETURNING</literal> clause. Some utility + commands such as <command>EXPLAIN</command> will work too. + </para> + + <para> + <application>PL/pgSQL</application> variables are replaced by query parameters, + and the query plan is cached for possible re-use, as discussed in + detail in <xref linkend="plpgsql-var-subst"/> and + <xref linkend="plpgsql-plan-caching"/>. + </para> + + <para> + The <literal>FOR-IN-EXECUTE</literal> statement is another way to iterate over + rows: +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +FOR <replaceable>target</replaceable> IN EXECUTE <replaceable>text_expression</replaceable> <optional> USING <replaceable>expression</replaceable> <optional>, ... </optional> </optional> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + This is like the previous form, except that the source query + is specified as a string expression, which is evaluated and replanned + on each entry to the <literal>FOR</literal> loop. This allows the programmer to + choose the speed of a preplanned query or the flexibility of a dynamic + query, just as with a plain <command>EXECUTE</command> statement. + As with <command>EXECUTE</command>, parameter values can be inserted + into the dynamic command via <literal>USING</literal>. + </para> + + <para> + Another way to specify the query whose results should be iterated + through is to declare it as a cursor. This is described in + <xref linkend="plpgsql-cursor-for-loop"/>. + </para> + </sect2> + + <sect2 id="plpgsql-foreach-array"> + <title>Looping through Arrays</title> + + <para> + The <literal>FOREACH</literal> loop is much like a <literal>FOR</literal> loop, + but instead of iterating through the rows returned by an SQL query, + it iterates through the elements of an array value. + (In general, <literal>FOREACH</literal> is meant for looping through + components of a composite-valued expression; variants for looping + through composites besides arrays may be added in future.) + The <literal>FOREACH</literal> statement to loop over an array is: + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +FOREACH <replaceable>target</replaceable> <optional> SLICE <replaceable>number</replaceable> </optional> IN ARRAY <replaceable>expression</replaceable> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + </para> + + <para> + Without <literal>SLICE</literal>, or if <literal>SLICE 0</literal> is specified, + the loop iterates through individual elements of the array produced + by evaluating the <replaceable>expression</replaceable>. + The <replaceable>target</replaceable> variable is assigned each + element value in sequence, and the loop body is executed for each element. + Here is an example of looping through the elements of an integer + array: + +<programlisting> +CREATE FUNCTION sum(int[]) RETURNS int8 AS $$ +DECLARE + s int8 := 0; + x int; +BEGIN + FOREACH x IN ARRAY $1 + LOOP + s := s + x; + END LOOP; + RETURN s; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + The elements are visited in storage order, regardless of the number of + array dimensions. Although the <replaceable>target</replaceable> is + usually just a single variable, it can be a list of variables when + looping through an array of composite values (records). In that case, + for each array element, the variables are assigned from successive + columns of the composite value. + </para> + + <para> + With a positive <literal>SLICE</literal> value, <literal>FOREACH</literal> + iterates through slices of the array rather than single elements. + The <literal>SLICE</literal> value must be an integer constant not larger + than the number of dimensions of the array. The + <replaceable>target</replaceable> variable must be an array, + and it receives successive slices of the array value, where each slice + is of the number of dimensions specified by <literal>SLICE</literal>. + Here is an example of iterating through one-dimensional slices: + +<programlisting> +CREATE FUNCTION scan_rows(int[]) RETURNS void AS $$ +DECLARE + x int[]; +BEGIN + FOREACH x SLICE 1 IN ARRAY $1 + LOOP + RAISE NOTICE 'row = %', x; + END LOOP; +END; +$$ LANGUAGE plpgsql; + +SELECT scan_rows(ARRAY[[1,2,3],[4,5,6],[7,8,9],[10,11,12]]); + +NOTICE: row = {1,2,3} +NOTICE: row = {4,5,6} +NOTICE: row = {7,8,9} +NOTICE: row = {10,11,12} +</programlisting> + </para> + </sect2> + + <sect2 id="plpgsql-error-trapping"> + <title>Trapping Errors</title> + + <indexterm> + <primary>exceptions</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + By default, any error occurring in a <application>PL/pgSQL</application> + function aborts execution of the function and the + surrounding transaction. You can trap errors and recover + from them by using a <command>BEGIN</command> block with an + <literal>EXCEPTION</literal> clause. The syntax is an extension of the + normal syntax for a <command>BEGIN</command> block: + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +<optional> DECLARE + <replaceable>declarations</replaceable> </optional> +BEGIN + <replaceable>statements</replaceable> +EXCEPTION + WHEN <replaceable>condition</replaceable> <optional> OR <replaceable>condition</replaceable> ... </optional> THEN + <replaceable>handler_statements</replaceable> + <optional> WHEN <replaceable>condition</replaceable> <optional> OR <replaceable>condition</replaceable> ... </optional> THEN + <replaceable>handler_statements</replaceable> + ... </optional> +END; +</synopsis> + </para> + + <para> + If no error occurs, this form of block simply executes all the + <replaceable>statements</replaceable>, and then control passes + to the next statement after <literal>END</literal>. But if an error + occurs within the <replaceable>statements</replaceable>, further + processing of the <replaceable>statements</replaceable> is + abandoned, and control passes to the <literal>EXCEPTION</literal> list. + The list is searched for the first <replaceable>condition</replaceable> + matching the error that occurred. If a match is found, the + corresponding <replaceable>handler_statements</replaceable> are + executed, and then control passes to the next statement after + <literal>END</literal>. If no match is found, the error propagates out + as though the <literal>EXCEPTION</literal> clause were not there at all: + the error can be caught by an enclosing block with + <literal>EXCEPTION</literal>, or if there is none it aborts processing + of the function. + </para> + + <para> + The <replaceable>condition</replaceable> names can be any of + those shown in <xref linkend="errcodes-appendix"/>. A category + name matches any error within its category. The special + condition name <literal>OTHERS</literal> matches every error type except + <literal>QUERY_CANCELED</literal> and <literal>ASSERT_FAILURE</literal>. + (It is possible, but often unwise, to trap those two error types + by name.) Condition names are + not case-sensitive. Also, an error condition can be specified + by <literal>SQLSTATE</literal> code; for example these are equivalent: +<programlisting> +WHEN division_by_zero THEN ... +WHEN SQLSTATE '22012' THEN ... +</programlisting> + </para> + + <para> + If a new error occurs within the selected + <replaceable>handler_statements</replaceable>, it cannot be caught + by this <literal>EXCEPTION</literal> clause, but is propagated out. + A surrounding <literal>EXCEPTION</literal> clause could catch it. + </para> + + <para> + When an error is caught by an <literal>EXCEPTION</literal> clause, + the local variables of the <application>PL/pgSQL</application> function + remain as they were when the error occurred, but all changes + to persistent database state within the block are rolled back. + As an example, consider this fragment: + +<programlisting> +INSERT INTO mytab(firstname, lastname) VALUES('Tom', 'Jones'); +BEGIN + UPDATE mytab SET firstname = 'Joe' WHERE lastname = 'Jones'; + x := x + 1; + y := x / 0; +EXCEPTION + WHEN division_by_zero THEN + RAISE NOTICE 'caught division_by_zero'; + RETURN x; +END; +</programlisting> + + When control reaches the assignment to <literal>y</literal>, it will + fail with a <literal>division_by_zero</literal> error. This will be caught by + the <literal>EXCEPTION</literal> clause. The value returned in the + <command>RETURN</command> statement will be the incremented value of + <literal>x</literal>, but the effects of the <command>UPDATE</command> command will + have been rolled back. The <command>INSERT</command> command preceding the + block is not rolled back, however, so the end result is that the database + contains <literal>Tom Jones</literal> not <literal>Joe Jones</literal>. + </para> + + <tip> + <para> + A block containing an <literal>EXCEPTION</literal> clause is significantly + more expensive to enter and exit than a block without one. Therefore, + don't use <literal>EXCEPTION</literal> without need. + </para> + </tip> + + <example id="plpgsql-upsert-example"> + <title>Exceptions with <command>UPDATE</command>/<command>INSERT</command></title> + <para> + + This example uses exception handling to perform either + <command>UPDATE</command> or <command>INSERT</command>, as appropriate. It is + recommended that applications use <command>INSERT</command> with + <literal>ON CONFLICT DO UPDATE</literal> rather than actually using + this pattern. This example serves primarily to illustrate use of + <application>PL/pgSQL</application> control flow structures: + +<programlisting> +CREATE TABLE db (a INT PRIMARY KEY, b TEXT); + +CREATE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS +$$ +BEGIN + LOOP + -- first try to update the key + UPDATE db SET b = data WHERE a = key; + IF found THEN + RETURN; + END IF; + -- not there, so try to insert the key + -- if someone else inserts the same key concurrently, + -- we could get a unique-key failure + BEGIN + INSERT INTO db(a,b) VALUES (key, data); + RETURN; + EXCEPTION WHEN unique_violation THEN + -- Do nothing, and loop to try the UPDATE again. + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; + +SELECT merge_db(1, 'david'); +SELECT merge_db(1, 'dennis'); +</programlisting> + + This coding assumes the <literal>unique_violation</literal> error is caused by + the <command>INSERT</command>, and not by, say, an <command>INSERT</command> in a + trigger function on the table. It might also misbehave if there is + more than one unique index on the table, since it will retry the + operation regardless of which index caused the error. + More safety could be had by using the + features discussed next to check that the trapped error was the one + expected. + </para> + </example> + + <sect3 id="plpgsql-exception-diagnostics"> + <title>Obtaining Information about an Error</title> + + <para> + Exception handlers frequently need to identify the specific error that + occurred. There are two ways to get information about the current + exception in <application>PL/pgSQL</application>: special variables and the + <command>GET STACKED DIAGNOSTICS</command> command. + </para> + + <para> + Within an exception handler, the special variable + <varname>SQLSTATE</varname> contains the error code that corresponds to + the exception that was raised (refer to <xref linkend="errcodes-table"/> + for a list of possible error codes). The special variable + <varname>SQLERRM</varname> contains the error message associated with the + exception. These variables are undefined outside exception handlers. + </para> + + <para> + Within an exception handler, one may also retrieve + information about the current exception by using the + <command>GET STACKED DIAGNOSTICS</command> command, which has the form: + +<synopsis> +GET STACKED DIAGNOSTICS <replaceable>variable</replaceable> { = | := } <replaceable>item</replaceable> <optional> , ... </optional>; +</synopsis> + + Each <replaceable>item</replaceable> is a key word identifying a status + value to be assigned to the specified <replaceable>variable</replaceable> + (which should be of the right data type to receive it). The currently + available status items are shown + in <xref linkend="plpgsql-exception-diagnostics-values"/>. + </para> + + <table id="plpgsql-exception-diagnostics-values"> + <title>Error Diagnostics Items</title> + <tgroup cols="3"> + <colspec colname="col1" colwidth="2*"/> + <colspec colname="col2" colwidth="1*"/> + <colspec colname="col3" colwidth="2*"/> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry><literal>RETURNED_SQLSTATE</literal></entry> + <entry><type>text</type></entry> + <entry>the SQLSTATE error code of the exception</entry> + </row> + <row> + <entry><literal>COLUMN_NAME</literal></entry> + <entry><type>text</type></entry> + <entry>the name of the column related to exception</entry> + </row> + <row> + <entry><literal>CONSTRAINT_NAME</literal></entry> + <entry><type>text</type></entry> + <entry>the name of the constraint related to exception</entry> + </row> + <row> + <entry><literal>PG_DATATYPE_NAME</literal></entry> + <entry><type>text</type></entry> + <entry>the name of the data type related to exception</entry> + </row> + <row> + <entry><literal>MESSAGE_TEXT</literal></entry> + <entry><type>text</type></entry> + <entry>the text of the exception's primary message</entry> + </row> + <row> + <entry><literal>TABLE_NAME</literal></entry> + <entry><type>text</type></entry> + <entry>the name of the table related to exception</entry> + </row> + <row> + <entry><literal>SCHEMA_NAME</literal></entry> + <entry><type>text</type></entry> + <entry>the name of the schema related to exception</entry> + </row> + <row> + <entry><literal>PG_EXCEPTION_DETAIL</literal></entry> + <entry><type>text</type></entry> + <entry>the text of the exception's detail message, if any</entry> + </row> + <row> + <entry><literal>PG_EXCEPTION_HINT</literal></entry> + <entry><type>text</type></entry> + <entry>the text of the exception's hint message, if any</entry> + </row> + <row> + <entry><literal>PG_EXCEPTION_CONTEXT</literal></entry> + <entry><type>text</type></entry> + <entry>line(s) of text describing the call stack at the time of the + exception (see <xref linkend="plpgsql-call-stack"/>)</entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + If the exception did not set a value for an item, an empty string + will be returned. + </para> + + <para> + Here is an example: +<programlisting> +DECLARE + text_var1 text; + text_var2 text; + text_var3 text; +BEGIN + -- some processing which might cause an exception + ... +EXCEPTION WHEN OTHERS THEN + GET STACKED DIAGNOSTICS text_var1 = MESSAGE_TEXT, + text_var2 = PG_EXCEPTION_DETAIL, + text_var3 = PG_EXCEPTION_HINT; +END; +</programlisting> + </para> + </sect3> + </sect2> + + <sect2 id="plpgsql-call-stack"> + <title>Obtaining Execution Location Information</title> + + <para> + The <command>GET DIAGNOSTICS</command> command, previously described + in <xref linkend="plpgsql-statements-diagnostics"/>, retrieves information + about current execution state (whereas the <command>GET STACKED + DIAGNOSTICS</command> command discussed above reports information about + the execution state as of a previous error). Its <literal>PG_CONTEXT</literal> + status item is useful for identifying the current execution + location. <literal>PG_CONTEXT</literal> returns a text string with line(s) + of text describing the call stack. The first line refers to the current + function and currently executing <command>GET DIAGNOSTICS</command> + command. The second and any subsequent lines refer to calling functions + further up the call stack. For example: + +<programlisting> +CREATE OR REPLACE FUNCTION outer_func() RETURNS integer AS $$ +BEGIN + RETURN inner_func(); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION inner_func() RETURNS integer AS $$ +DECLARE + stack text; +BEGIN + GET DIAGNOSTICS stack = PG_CONTEXT; + RAISE NOTICE E'--- Call Stack ---\n%', stack; + RETURN 1; +END; +$$ LANGUAGE plpgsql; + +SELECT outer_func(); + +NOTICE: --- Call Stack --- +PL/pgSQL function inner_func() line 5 at GET DIAGNOSTICS +PL/pgSQL function outer_func() line 3 at RETURN +CONTEXT: PL/pgSQL function outer_func() line 3 at RETURN + outer_func + ------------ + 1 +(1 row) +</programlisting> + + </para> + + <para> + <literal>GET STACKED DIAGNOSTICS ... PG_EXCEPTION_CONTEXT</literal> + returns the same sort of stack trace, but describing the location + at which an error was detected, rather than the current location. + </para> + </sect2> + </sect1> + + <sect1 id="plpgsql-cursors"> + <title>Cursors</title> + + <indexterm zone="plpgsql-cursors"> + <primary>cursor</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + Rather than executing a whole query at once, it is possible to set + up a <firstterm>cursor</firstterm> that encapsulates the query, and then read + the query result a few rows at a time. One reason for doing this is + to avoid memory overrun when the result contains a large number of + rows. (However, <application>PL/pgSQL</application> users do not normally need + to worry about that, since <literal>FOR</literal> loops automatically use a cursor + internally to avoid memory problems.) A more interesting usage is to + return a reference to a cursor that a function has created, allowing the + caller to read the rows. This provides an efficient way to return + large row sets from functions. + </para> + + <sect2 id="plpgsql-cursor-declarations"> + <title>Declaring Cursor Variables</title> + + <para> + All access to cursors in <application>PL/pgSQL</application> goes through + cursor variables, which are always of the special data type + <type>refcursor</type>. One way to create a cursor variable + is just to declare it as a variable of type <type>refcursor</type>. + Another way is to use the cursor declaration syntax, + which in general is: +<synopsis> +<replaceable>name</replaceable> <optional> <optional> NO </optional> SCROLL </optional> CURSOR <optional> ( <replaceable>arguments</replaceable> ) </optional> FOR <replaceable>query</replaceable>; +</synopsis> + (<literal>FOR</literal> can be replaced by <literal>IS</literal> for + <productname>Oracle</productname> compatibility.) + If <literal>SCROLL</literal> is specified, the cursor will be capable of + scrolling backward; if <literal>NO SCROLL</literal> is specified, backward + fetches will be rejected; if neither specification appears, it is + query-dependent whether backward fetches will be allowed. + <replaceable>arguments</replaceable>, if specified, is a + comma-separated list of pairs <literal><replaceable>name</replaceable> + <replaceable>datatype</replaceable></literal> that define names to be + replaced by parameter values in the given query. The actual + values to substitute for these names will be specified later, + when the cursor is opened. + </para> + <para> + Some examples: +<programlisting> +DECLARE + curs1 refcursor; + curs2 CURSOR FOR SELECT * FROM tenk1; + curs3 CURSOR (key integer) FOR SELECT * FROM tenk1 WHERE unique1 = key; +</programlisting> + All three of these variables have the data type <type>refcursor</type>, + but the first can be used with any query, while the second has + a fully specified query already <firstterm>bound</firstterm> to it, and the last + has a parameterized query bound to it. (<literal>key</literal> will be + replaced by an integer parameter value when the cursor is opened.) + The variable <literal>curs1</literal> + is said to be <firstterm>unbound</firstterm> since it is not bound to + any particular query. + </para> + + <para> + The <literal>SCROLL</literal> option cannot be used when the cursor's + query uses <literal>FOR UPDATE/SHARE</literal>. Also, it is + best to use <literal>NO SCROLL</literal> with a query that involves + volatile functions. The implementation of <literal>SCROLL</literal> + assumes that re-reading the query's output will give consistent + results, which a volatile function might not do. + </para> + </sect2> + + <sect2 id="plpgsql-cursor-opening"> + <title>Opening Cursors</title> + + <para> + Before a cursor can be used to retrieve rows, it must be + <firstterm>opened</firstterm>. (This is the equivalent action to the SQL + command <command>DECLARE CURSOR</command>.) <application>PL/pgSQL</application> has + three forms of the <command>OPEN</command> statement, two of which use unbound + cursor variables while the third uses a bound cursor variable. + </para> + + <note> + <para> + Bound cursor variables can also be used without explicitly opening the cursor, + via the <command>FOR</command> statement described in + <xref linkend="plpgsql-cursor-for-loop"/>. + </para> + </note> + + <sect3> + <title><command>OPEN FOR</command> <replaceable>query</replaceable></title> + +<synopsis> +OPEN <replaceable>unbound_cursorvar</replaceable> <optional> <optional> NO </optional> SCROLL </optional> FOR <replaceable>query</replaceable>; +</synopsis> + + <para> + The cursor variable is opened and given the specified query to + execute. The cursor cannot be open already, and it must have been + declared as an unbound cursor variable (that is, as a simple + <type>refcursor</type> variable). The query must be a + <command>SELECT</command>, or something else that returns rows + (such as <command>EXPLAIN</command>). The query + is treated in the same way as other SQL commands in + <application>PL/pgSQL</application>: <application>PL/pgSQL</application> + variable names are substituted, and the query plan is cached for + possible reuse. When a <application>PL/pgSQL</application> + variable is substituted into the cursor query, the value that is + substituted is the one it has at the time of the <command>OPEN</command>; + subsequent changes to the variable will not affect the cursor's + behavior. + The <literal>SCROLL</literal> and <literal>NO SCROLL</literal> + options have the same meanings as for a bound cursor. + </para> + + <para> + An example: +<programlisting> +OPEN curs1 FOR SELECT * FROM foo WHERE key = mykey; +</programlisting> + </para> + </sect3> + + <sect3> + <title><command>OPEN FOR EXECUTE</command></title> + +<synopsis> +OPEN <replaceable>unbound_cursorvar</replaceable> <optional> <optional> NO </optional> SCROLL </optional> FOR EXECUTE <replaceable class="command">query_string</replaceable> + <optional> USING <replaceable>expression</replaceable> <optional>, ... </optional> </optional>; +</synopsis> + + <para> + The cursor variable is opened and given the specified query to + execute. The cursor cannot be open already, and it must have been + declared as an unbound cursor variable (that is, as a simple + <type>refcursor</type> variable). The query is specified as a string + expression, in the same way as in the <command>EXECUTE</command> + command. As usual, this gives flexibility so the query plan can vary + from one run to the next (see <xref linkend="plpgsql-plan-caching"/>), + and it also means that variable substitution is not done on the + command string. As with <command>EXECUTE</command>, parameter values + can be inserted into the dynamic command via + <literal>format()</literal> and <literal>USING</literal>. + The <literal>SCROLL</literal> and + <literal>NO SCROLL</literal> options have the same meanings as for a bound + cursor. + </para> + + <para> + An example: +<programlisting> +OPEN curs1 FOR EXECUTE format('SELECT * FROM %I WHERE col1 = $1',tabname) USING keyvalue; +</programlisting> + In this example, the table name is inserted into the query via + <function>format()</function>. The comparison value for <literal>col1</literal> + is inserted via a <literal>USING</literal> parameter, so it needs + no quoting. + </para> + </sect3> + + <sect3 id="plpgsql-open-bound-cursor"> + <title>Opening a Bound Cursor</title> + +<synopsis> +OPEN <replaceable>bound_cursorvar</replaceable> <optional> ( <optional> <replaceable>argument_name</replaceable> := </optional> <replaceable>argument_value</replaceable> <optional>, ...</optional> ) </optional>; +</synopsis> + + <para> + This form of <command>OPEN</command> is used to open a cursor + variable whose query was bound to it when it was declared. The + cursor cannot be open already. A list of actual argument value + expressions must appear if and only if the cursor was declared to + take arguments. These values will be substituted in the query. + </para> + + <para> + The query plan for a bound cursor is always considered cacheable; + there is no equivalent of <command>EXECUTE</command> in this case. + Notice that <literal>SCROLL</literal> and <literal>NO SCROLL</literal> cannot be + specified in <command>OPEN</command>, as the cursor's scrolling + behavior was already determined. + </para> + + <para> + Argument values can be passed using either <firstterm>positional</firstterm> + or <firstterm>named</firstterm> notation. In positional + notation, all arguments are specified in order. In named notation, + each argument's name is specified using <literal>:=</literal> to + separate it from the argument expression. Similar to calling + functions, described in <xref linkend="sql-syntax-calling-funcs"/>, it + is also allowed to mix positional and named notation. + </para> + + <para> + Examples (these use the cursor declaration examples above): +<programlisting> +OPEN curs2; +OPEN curs3(42); +OPEN curs3(key := 42); +</programlisting> + </para> + + <para> + Because variable substitution is done on a bound cursor's query, + there are really two ways to pass values into the cursor: either + with an explicit argument to <command>OPEN</command>, or implicitly by + referencing a <application>PL/pgSQL</application> variable in the query. + However, only variables declared before the bound cursor was + declared will be substituted into it. In either case the value to + be passed is determined at the time of the <command>OPEN</command>. + For example, another way to get the same effect as the + <literal>curs3</literal> example above is +<programlisting> +DECLARE + key integer; + curs4 CURSOR FOR SELECT * FROM tenk1 WHERE unique1 = key; +BEGIN + key := 42; + OPEN curs4; +</programlisting> + </para> + </sect3> + </sect2> + + <sect2 id="plpgsql-cursor-using"> + <title>Using Cursors</title> + + <para> + Once a cursor has been opened, it can be manipulated with the + statements described here. + </para> + + <para> + These manipulations need not occur in the same function that + opened the cursor to begin with. You can return a <type>refcursor</type> + value out of a function and let the caller operate on the cursor. + (Internally, a <type>refcursor</type> value is simply the string name + of a so-called portal containing the active query for the cursor. This name + can be passed around, assigned to other <type>refcursor</type> variables, + and so on, without disturbing the portal.) + </para> + + <para> + All portals are implicitly closed at transaction end. Therefore + a <type>refcursor</type> value is usable to reference an open cursor + only until the end of the transaction. + </para> + + <sect3> + <title><literal>FETCH</literal></title> + +<synopsis> +FETCH <optional> <replaceable>direction</replaceable> { FROM | IN } </optional> <replaceable>cursor</replaceable> INTO <replaceable>target</replaceable>; +</synopsis> + + <para> + <command>FETCH</command> retrieves the next row from the + cursor into a target, which might be a row variable, a record + variable, or a comma-separated list of simple variables, just like + <command>SELECT INTO</command>. If there is no next row, the + target is set to NULL(s). As with <command>SELECT + INTO</command>, the special variable <literal>FOUND</literal> can + be checked to see whether a row was obtained or not. + </para> + + <para> + The <replaceable>direction</replaceable> clause can be any of the + variants allowed in the SQL <xref linkend="sql-fetch"/> + command except the ones that can fetch + more than one row; namely, it can be + <literal>NEXT</literal>, + <literal>PRIOR</literal>, + <literal>FIRST</literal>, + <literal>LAST</literal>, + <literal>ABSOLUTE</literal> <replaceable>count</replaceable>, + <literal>RELATIVE</literal> <replaceable>count</replaceable>, + <literal>FORWARD</literal>, or + <literal>BACKWARD</literal>. + Omitting <replaceable>direction</replaceable> is the same + as specifying <literal>NEXT</literal>. + In the forms using a <replaceable>count</replaceable>, + the <replaceable>count</replaceable> can be any integer-valued + expression (unlike the SQL <command>FETCH</command> command, + which only allows an integer constant). + <replaceable>direction</replaceable> values that require moving + backward are likely to fail unless the cursor was declared or opened + with the <literal>SCROLL</literal> option. + </para> + + <para> + <replaceable>cursor</replaceable> must be the name of a <type>refcursor</type> + variable that references an open cursor portal. + </para> + + <para> + Examples: +<programlisting> +FETCH curs1 INTO rowvar; +FETCH curs2 INTO foo, bar, baz; +FETCH LAST FROM curs3 INTO x, y; +FETCH RELATIVE -2 FROM curs4 INTO x; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>MOVE</literal></title> + +<synopsis> +MOVE <optional> <replaceable>direction</replaceable> { FROM | IN } </optional> <replaceable>cursor</replaceable>; +</synopsis> + + <para> + <command>MOVE</command> repositions a cursor without retrieving + any data. <command>MOVE</command> works exactly like the + <command>FETCH</command> command, except it only repositions the + cursor and does not return the row moved to. As with <command>SELECT + INTO</command>, the special variable <literal>FOUND</literal> can + be checked to see whether there was a next row to move to. + </para> + + <para> + Examples: +<programlisting> +MOVE curs1; +MOVE LAST FROM curs3; +MOVE RELATIVE -2 FROM curs4; +MOVE FORWARD 2 FROM curs4; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>UPDATE/DELETE WHERE CURRENT OF</literal></title> + +<synopsis> +UPDATE <replaceable>table</replaceable> SET ... WHERE CURRENT OF <replaceable>cursor</replaceable>; +DELETE FROM <replaceable>table</replaceable> WHERE CURRENT OF <replaceable>cursor</replaceable>; +</synopsis> + + <para> + When a cursor is positioned on a table row, that row can be updated + or deleted using the cursor to identify the row. There are + restrictions on what the cursor's query can be (in particular, + no grouping) and it's best to use <literal>FOR UPDATE</literal> in the + cursor. For more information see the + <xref linkend="sql-declare"/> + reference page. + </para> + + <para> + An example: +<programlisting> +UPDATE foo SET dataval = myval WHERE CURRENT OF curs1; +</programlisting> + </para> + </sect3> + + <sect3> + <title><literal>CLOSE</literal></title> + +<synopsis> +CLOSE <replaceable>cursor</replaceable>; +</synopsis> + + <para> + <command>CLOSE</command> closes the portal underlying an open + cursor. This can be used to release resources earlier than end of + transaction, or to free up the cursor variable to be opened again. + </para> + + <para> + An example: +<programlisting> +CLOSE curs1; +</programlisting> + </para> + </sect3> + + <sect3> + <title>Returning Cursors</title> + + <para> + <application>PL/pgSQL</application> functions can return cursors to the + caller. This is useful to return multiple rows or columns, + especially with very large result sets. To do this, the function + opens the cursor and returns the cursor name to the caller (or simply + opens the cursor using a portal name specified by or otherwise known + to the caller). The caller can then fetch rows from the cursor. The + cursor can be closed by the caller, or it will be closed automatically + when the transaction closes. + </para> + + <para> + The portal name used for a cursor can be specified by the + programmer or automatically generated. To specify a portal name, + simply assign a string to the <type>refcursor</type> variable before + opening it. The string value of the <type>refcursor</type> variable + will be used by <command>OPEN</command> as the name of the underlying portal. + However, if the <type>refcursor</type> variable is null, + <command>OPEN</command> automatically generates a name that does not + conflict with any existing portal, and assigns it to the + <type>refcursor</type> variable. + </para> + + <note> + <para> + A bound cursor variable is initialized to the string value + representing its name, so that the portal name is the same as + the cursor variable name, unless the programmer overrides it + by assignment before opening the cursor. But an unbound cursor + variable defaults to the null value initially, so it will receive + an automatically-generated unique name, unless overridden. + </para> + </note> + + <para> + The following example shows one way a cursor name can be supplied by + the caller: + +<programlisting> +CREATE TABLE test (col text); +INSERT INTO test VALUES ('123'); + +CREATE FUNCTION reffunc(refcursor) RETURNS refcursor AS ' +BEGIN + OPEN $1 FOR SELECT col FROM test; + RETURN $1; +END; +' LANGUAGE plpgsql; + +BEGIN; +SELECT reffunc('funccursor'); +FETCH ALL IN funccursor; +COMMIT; +</programlisting> + </para> + + <para> + The following example uses automatic cursor name generation: + +<programlisting> +CREATE FUNCTION reffunc2() RETURNS refcursor AS ' +DECLARE + ref refcursor; +BEGIN + OPEN ref FOR SELECT col FROM test; + RETURN ref; +END; +' LANGUAGE plpgsql; + +-- need to be in a transaction to use cursors. +BEGIN; +SELECT reffunc2(); + + reffunc2 +-------------------- + <unnamed cursor 1> +(1 row) + +FETCH ALL IN "<unnamed cursor 1>"; +COMMIT; +</programlisting> + </para> + + <para> + The following example shows one way to return multiple cursors + from a single function: + +<programlisting> +CREATE FUNCTION myfunc(refcursor, refcursor) RETURNS SETOF refcursor AS $$ +BEGIN + OPEN $1 FOR SELECT * FROM table_1; + RETURN NEXT $1; + OPEN $2 FOR SELECT * FROM table_2; + RETURN NEXT $2; +END; +$$ LANGUAGE plpgsql; + +-- need to be in a transaction to use cursors. +BEGIN; + +SELECT * FROM myfunc('a', 'b'); + +FETCH ALL FROM a; +FETCH ALL FROM b; +COMMIT; +</programlisting> + </para> + </sect3> + </sect2> + + <sect2 id="plpgsql-cursor-for-loop"> + <title>Looping through a Cursor's Result</title> + + <para> + There is a variant of the <command>FOR</command> statement that allows + iterating through the rows returned by a cursor. The syntax is: + +<synopsis> +<optional> <<<replaceable>label</replaceable>>> </optional> +FOR <replaceable>recordvar</replaceable> IN <replaceable>bound_cursorvar</replaceable> <optional> ( <optional> <replaceable>argument_name</replaceable> := </optional> <replaceable>argument_value</replaceable> <optional>, ...</optional> ) </optional> LOOP + <replaceable>statements</replaceable> +END LOOP <optional> <replaceable>label</replaceable> </optional>; +</synopsis> + + The cursor variable must have been bound to some query when it was + declared, and it <emphasis>cannot</emphasis> be open already. The + <command>FOR</command> statement automatically opens the cursor, and it closes + the cursor again when the loop exits. A list of actual argument value + expressions must appear if and only if the cursor was declared to take + arguments. These values will be substituted in the query, in just + the same way as during an <command>OPEN</command> (see <xref + linkend="plpgsql-open-bound-cursor"/>). + </para> + + <para> + The variable <replaceable>recordvar</replaceable> is automatically + defined as type <type>record</type> and exists only inside the loop (any + existing definition of the variable name is ignored within the loop). + Each row returned by the cursor is successively assigned to this + record variable and the loop body is executed. + </para> + </sect2> + + </sect1> + + <sect1 id="plpgsql-transactions"> + <title>Transaction Management</title> + + <para> + In procedures invoked by the <command>CALL</command> command + as well as in anonymous code blocks (<command>DO</command> command), + it is possible to end transactions using the + commands <command>COMMIT</command> and <command>ROLLBACK</command>. A new + transaction is started automatically after a transaction is ended using + these commands, so there is no separate <command>START + TRANSACTION</command> command. (Note that <command>BEGIN</command> and + <command>END</command> have different meanings in PL/pgSQL.) + </para> + + <para> + Here is a simple example: +<programlisting> +CREATE PROCEDURE transaction_test1() +LANGUAGE plpgsql +AS $$ +BEGIN + FOR i IN 0..9 LOOP + INSERT INTO test1 (a) VALUES (i); + IF i % 2 = 0 THEN + COMMIT; + ELSE + ROLLBACK; + END IF; + END LOOP; +END; +$$; + +CALL transaction_test1(); +</programlisting> + </para> + + <indexterm zone="plpgsql-transaction-chain"> + <primary>chained transactions</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para id="plpgsql-transaction-chain"> + A new transaction starts out with default transaction characteristics such + as transaction isolation level. In cases where transactions are committed + in a loop, it might be desirable to start new transactions automatically + with the same characteristics as the previous one. The commands + <command>COMMIT AND CHAIN</command> and <command>ROLLBACK AND + CHAIN</command> accomplish this. + </para> + + <para> + Transaction control is only possible in <command>CALL</command> or + <command>DO</command> invocations from the top level or nested + <command>CALL</command> or <command>DO</command> invocations without any + other intervening command. For example, if the call stack is + <command>CALL proc1()</command> → <command>CALL proc2()</command> + → <command>CALL proc3()</command>, then the second and third + procedures can perform transaction control actions. But if the call stack + is <command>CALL proc1()</command> → <command>SELECT + func2()</command> → <command>CALL proc3()</command>, then the last + procedure cannot do transaction control, because of the + <command>SELECT</command> in between. + </para> + + <para> + Special considerations apply to cursor loops. Consider this example: +<programlisting> +CREATE PROCEDURE transaction_test2() +LANGUAGE plpgsql +AS $$ +DECLARE + r RECORD; +BEGIN + FOR r IN SELECT * FROM test2 ORDER BY x LOOP + INSERT INTO test1 (a) VALUES (r.x); + COMMIT; + END LOOP; +END; +$$; + +CALL transaction_test2(); +</programlisting> + Normally, cursors are automatically closed at transaction commit. + However, a cursor created as part of a loop like this is automatically + converted to a holdable cursor by the first <command>COMMIT</command> or + <command>ROLLBACK</command>. That means that the cursor is fully + evaluated at the first <command>COMMIT</command> or + <command>ROLLBACK</command> rather than row by row. The cursor is still + removed automatically after the loop, so this is mostly invisible to the + user. + </para> + + <para> + Transaction commands are not allowed in cursor loops driven by commands + that are not read-only (for example <command>UPDATE + ... RETURNING</command>). + </para> + + <para> + A transaction cannot be ended inside a block with exception handlers. + </para> + </sect1> + + <sect1 id="plpgsql-errors-and-messages"> + <title>Errors and Messages</title> + + <sect2 id="plpgsql-statements-raise"> + <title>Reporting Errors and Messages</title> + + <indexterm> + <primary>RAISE</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary>reporting errors</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + Use the <command>RAISE</command> statement to report messages and + raise errors. + +<synopsis> +RAISE <optional> <replaceable class="parameter">level</replaceable> </optional> '<replaceable class="parameter">format</replaceable>' <optional>, <replaceable class="parameter">expression</replaceable> <optional>, ... </optional></optional> <optional> USING <replaceable class="parameter">option</replaceable> = <replaceable class="parameter">expression</replaceable> <optional>, ... </optional> </optional>; +RAISE <optional> <replaceable class="parameter">level</replaceable> </optional> <replaceable class="parameter">condition_name</replaceable> <optional> USING <replaceable class="parameter">option</replaceable> = <replaceable class="parameter">expression</replaceable> <optional>, ... </optional> </optional>; +RAISE <optional> <replaceable class="parameter">level</replaceable> </optional> SQLSTATE '<replaceable class="parameter">sqlstate</replaceable>' <optional> USING <replaceable class="parameter">option</replaceable> = <replaceable class="parameter">expression</replaceable> <optional>, ... </optional> </optional>; +RAISE <optional> <replaceable class="parameter">level</replaceable> </optional> USING <replaceable class="parameter">option</replaceable> = <replaceable class="parameter">expression</replaceable> <optional>, ... </optional>; +RAISE ; +</synopsis> + + The <replaceable class="parameter">level</replaceable> option specifies + the error severity. Allowed levels are <literal>DEBUG</literal>, + <literal>LOG</literal>, <literal>INFO</literal>, + <literal>NOTICE</literal>, <literal>WARNING</literal>, + and <literal>EXCEPTION</literal>, with <literal>EXCEPTION</literal> + being the default. + <literal>EXCEPTION</literal> raises an error (which normally aborts the + current transaction); the other levels only generate messages of different + priority levels. + Whether messages of a particular priority are reported to the client, + written to the server log, or both is controlled by the + <xref linkend="guc-log-min-messages"/> and + <xref linkend="guc-client-min-messages"/> configuration + variables. See <xref linkend="runtime-config"/> for more + information. + </para> + + <para> + After <replaceable class="parameter">level</replaceable> if any, + you can specify a <replaceable class="parameter">format</replaceable> string + (which must be a simple string literal, not an expression). The + format string specifies the error message text to be reported. + The format string can be followed + by optional argument expressions to be inserted into the message. + Inside the format string, <literal>%</literal> is replaced by the + string representation of the next optional argument's value. Write + <literal>%%</literal> to emit a literal <literal>%</literal>. + The number of arguments must match the number of <literal>%</literal> + placeholders in the format string, or an error is raised during + the compilation of the function. + </para> + + <para> + In this example, the value of <literal>v_job_id</literal> will replace the + <literal>%</literal> in the string: +<programlisting> +RAISE NOTICE 'Calling cs_create_job(%)', v_job_id; +</programlisting> + </para> + + <para> + You can attach additional information to the error report by writing + <literal>USING</literal> followed by <replaceable + class="parameter">option</replaceable> = <replaceable + class="parameter">expression</replaceable> items. Each + <replaceable class="parameter">expression</replaceable> can be any + string-valued expression. The allowed <replaceable + class="parameter">option</replaceable> key words are: + + <variablelist id="raise-using-options"> + <varlistentry> + <term><literal>MESSAGE</literal></term> + <listitem> + <para>Sets the error message text. This option can't be used in the + form of <command>RAISE</command> that includes a format string + before <literal>USING</literal>.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>DETAIL</literal></term> + <listitem> + <para>Supplies an error detail message.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>HINT</literal></term> + <listitem> + <para>Supplies a hint message.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>ERRCODE</literal></term> + <listitem> + <para>Specifies the error code (SQLSTATE) to report, either by condition + name, as shown in <xref linkend="errcodes-appendix"/>, or directly as a + five-character SQLSTATE code.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>COLUMN</literal></term> + <term><literal>CONSTRAINT</literal></term> + <term><literal>DATATYPE</literal></term> + <term><literal>TABLE</literal></term> + <term><literal>SCHEMA</literal></term> + <listitem> + <para>Supplies the name of a related object.</para> + </listitem> + </varlistentry> + </variablelist> + </para> + + <para> + This example will abort the transaction with the given error message + and hint: +<programlisting> +RAISE EXCEPTION 'Nonexistent ID --> %', user_id + USING HINT = 'Please check your user ID'; +</programlisting> + </para> + + <para> + These two examples show equivalent ways of setting the SQLSTATE: +<programlisting> +RAISE 'Duplicate user ID: %', user_id USING ERRCODE = 'unique_violation'; +RAISE 'Duplicate user ID: %', user_id USING ERRCODE = '23505'; +</programlisting> + </para> + + <para> + There is a second <command>RAISE</command> syntax in which the main argument + is the condition name or SQLSTATE to be reported, for example: +<programlisting> +RAISE division_by_zero; +RAISE SQLSTATE '22012'; +</programlisting> + In this syntax, <literal>USING</literal> can be used to supply a custom + error message, detail, or hint. Another way to do the earlier + example is +<programlisting> +RAISE unique_violation USING MESSAGE = 'Duplicate user ID: ' || user_id; +</programlisting> + </para> + + <para> + Still another variant is to write <literal>RAISE USING</literal> or <literal>RAISE + <replaceable class="parameter">level</replaceable> USING</literal> and put + everything else into the <literal>USING</literal> list. + </para> + + <para> + The last variant of <command>RAISE</command> has no parameters at all. + This form can only be used inside a <literal>BEGIN</literal> block's + <literal>EXCEPTION</literal> clause; + it causes the error currently being handled to be re-thrown. + </para> + + <note> + <para> + Before <productname>PostgreSQL</productname> 9.1, <command>RAISE</command> without + parameters was interpreted as re-throwing the error from the block + containing the active exception handler. Thus an <literal>EXCEPTION</literal> + clause nested within that handler could not catch it, even if the + <command>RAISE</command> was within the nested <literal>EXCEPTION</literal> clause's + block. This was deemed surprising as well as being incompatible with + Oracle's PL/SQL. + </para> + </note> + + <para> + If no condition name nor SQLSTATE is specified in a + <command>RAISE EXCEPTION</command> command, the default is to use + <literal>ERRCODE_RAISE_EXCEPTION</literal> (<literal>P0001</literal>). + If no message text is specified, the default is to use the condition + name or SQLSTATE as message text. + </para> + + <note> + <para> + When specifying an error code by SQLSTATE code, you are not + limited to the predefined error codes, but can select any + error code consisting of five digits and/or upper-case ASCII + letters, other than <literal>00000</literal>. It is recommended that + you avoid throwing error codes that end in three zeroes, because + these are category codes and can only be trapped by trapping + the whole category. + </para> + </note> + + </sect2> + + <sect2 id="plpgsql-statements-assert"> + <title>Checking Assertions</title> + + <indexterm> + <primary>ASSERT</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary>assertions</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <indexterm> + <primary><varname>plpgsql.check_asserts</varname> configuration parameter</primary> + </indexterm> + + <para> + The <command>ASSERT</command> statement is a convenient shorthand for + inserting debugging checks into <application>PL/pgSQL</application> + functions. + +<synopsis> +ASSERT <replaceable class="parameter">condition</replaceable> <optional> , <replaceable class="parameter">message</replaceable> </optional>; +</synopsis> + + The <replaceable class="parameter">condition</replaceable> is a Boolean + expression that is expected to always evaluate to true; if it does, + the <command>ASSERT</command> statement does nothing further. If the + result is false or null, then an <literal>ASSERT_FAILURE</literal> exception + is raised. (If an error occurs while evaluating + the <replaceable class="parameter">condition</replaceable>, it is + reported as a normal error.) + </para> + + <para> + If the optional <replaceable class="parameter">message</replaceable> is + provided, it is an expression whose result (if not null) replaces the + default error message text <quote>assertion failed</quote>, should + the <replaceable class="parameter">condition</replaceable> fail. + The <replaceable class="parameter">message</replaceable> expression is + not evaluated in the normal case where the assertion succeeds. + </para> + + <para> + Testing of assertions can be enabled or disabled via the configuration + parameter <literal>plpgsql.check_asserts</literal>, which takes a Boolean + value; the default is <literal>on</literal>. If this parameter + is <literal>off</literal> then <command>ASSERT</command> statements do nothing. + </para> + + <para> + Note that <command>ASSERT</command> is meant for detecting program + bugs, not for reporting ordinary error conditions. Use + the <command>RAISE</command> statement, described above, for that. + </para> + + </sect2> + + </sect1> + + <sect1 id="plpgsql-trigger"> + <title>Trigger Functions</title> + + <indexterm zone="plpgsql-trigger"> + <primary>trigger</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + + <para> + <application>PL/pgSQL</application> can be used to define trigger + functions on data changes or database events. + A trigger function is created with the <command>CREATE FUNCTION</command> + command, declaring it as a function with no arguments and a return type of + <type>trigger</type> (for data change triggers) or + <type>event_trigger</type> (for database event triggers). + Special local variables named <varname>TG_<replaceable>something</replaceable></varname> are + automatically defined to describe the condition that triggered the call. + </para> + + <sect2 id="plpgsql-dml-trigger"> + <title>Triggers on Data Changes</title> + + <para> + A <link linkend="triggers">data change trigger</link> is declared as a + function with no arguments and a return type of <type>trigger</type>. + Note that the function must be declared with no arguments even if it + expects to receive some arguments specified in <command>CREATE TRIGGER</command> + — such arguments are passed via <varname>TG_ARGV</varname>, as described + below. + </para> + + <para> + When a <application>PL/pgSQL</application> function is called as a + trigger, several special variables are created automatically in the + top-level block. They are: + + <variablelist> + <varlistentry> + <term><varname>NEW</varname></term> + <listitem> + <para> + Data type <type>RECORD</type>; variable holding the new + database row for <command>INSERT</command>/<command>UPDATE</command> operations in row-level + triggers. This variable is null in statement-level triggers + and for <command>DELETE</command> operations. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>OLD</varname></term> + <listitem> + <para> + Data type <type>RECORD</type>; variable holding the old + database row for <command>UPDATE</command>/<command>DELETE</command> operations in row-level + triggers. This variable is null in statement-level triggers + and for <command>INSERT</command> operations. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_NAME</varname></term> + <listitem> + <para> + Data type <type>name</type>; variable that contains the name of the trigger actually + fired. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_WHEN</varname></term> + <listitem> + <para> + Data type <type>text</type>; a string of + <literal>BEFORE</literal>, <literal>AFTER</literal>, or + <literal>INSTEAD OF</literal>, depending on the trigger's definition. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_LEVEL</varname></term> + <listitem> + <para> + Data type <type>text</type>; a string of either + <literal>ROW</literal> or <literal>STATEMENT</literal> + depending on the trigger's definition. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_OP</varname></term> + <listitem> + <para> + Data type <type>text</type>; a string of + <literal>INSERT</literal>, <literal>UPDATE</literal>, + <literal>DELETE</literal>, or <literal>TRUNCATE</literal> + telling for which operation the trigger was fired. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_RELID</varname></term> + <listitem> + <para> + Data type <type>oid</type>; the object ID of the table that caused the + trigger invocation. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_RELNAME</varname></term> + <listitem> + <para> + Data type <type>name</type>; the name of the table that caused the trigger + invocation. This is now deprecated, and could disappear in a future + release. Use <literal>TG_TABLE_NAME</literal> instead. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_TABLE_NAME</varname></term> + <listitem> + <para> + Data type <type>name</type>; the name of the table that + caused the trigger invocation. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_TABLE_SCHEMA</varname></term> + <listitem> + <para> + Data type <type>name</type>; the name of the schema of the + table that caused the trigger invocation. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_NARGS</varname></term> + <listitem> + <para> + Data type <type>integer</type>; the number of arguments given to the trigger + function in the <command>CREATE TRIGGER</command> statement. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_ARGV[]</varname></term> + <listitem> + <para> + Data type array of <type>text</type>; the arguments from + the <command>CREATE TRIGGER</command> statement. + The index counts from 0. Invalid + indexes (less than 0 or greater than or equal to <varname>tg_nargs</varname>) + result in a null value. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + + <para> + A trigger function must return either <symbol>NULL</symbol> or a + record/row value having exactly the structure of the table the + trigger was fired for. + </para> + + <para> + Row-level triggers fired <literal>BEFORE</literal> can return null to signal the + trigger manager to skip the rest of the operation for this row + (i.e., subsequent triggers are not fired, and the + <command>INSERT</command>/<command>UPDATE</command>/<command>DELETE</command> does not occur + for this row). If a nonnull + value is returned then the operation proceeds with that row value. + Returning a row value different from the original value + of <varname>NEW</varname> alters the row that will be inserted or + updated. Thus, if the trigger function wants the triggering + action to succeed normally without altering the row + value, <varname>NEW</varname> (or a value equal thereto) has to be + returned. To alter the row to be stored, it is possible to + replace single values directly in <varname>NEW</varname> and return the + modified <varname>NEW</varname>, or to build a complete new record/row to + return. In the case of a before-trigger + on <command>DELETE</command>, the returned value has no direct + effect, but it has to be nonnull to allow the trigger action to + proceed. Note that <varname>NEW</varname> is null + in <command>DELETE</command> triggers, so returning that is + usually not sensible. The usual idiom in <command>DELETE</command> + triggers is to return <varname>OLD</varname>. + </para> + + <para> + <literal>INSTEAD OF</literal> triggers (which are always row-level triggers, + and may only be used on views) can return null to signal that they did + not perform any updates, and that the rest of the operation for this + row should be skipped (i.e., subsequent triggers are not fired, and the + row is not counted in the rows-affected status for the surrounding + <command>INSERT</command>/<command>UPDATE</command>/<command>DELETE</command>). + Otherwise a nonnull value should be returned, to signal + that the trigger performed the requested operation. For + <command>INSERT</command> and <command>UPDATE</command> operations, the return value + should be <varname>NEW</varname>, which the trigger function may modify to + support <command>INSERT RETURNING</command> and <command>UPDATE RETURNING</command> + (this will also affect the row value passed to any subsequent triggers, + or passed to a special <varname>EXCLUDED</varname> alias reference within + an <command>INSERT</command> statement with an <literal>ON CONFLICT DO + UPDATE</literal> clause). For <command>DELETE</command> operations, the return + value should be <varname>OLD</varname>. + </para> + + <para> + The return value of a row-level trigger + fired <literal>AFTER</literal> or a statement-level trigger + fired <literal>BEFORE</literal> or <literal>AFTER</literal> is + always ignored; it might as well be null. However, any of these types of + triggers might still abort the entire operation by raising an error. + </para> + + <para> + <xref linkend="plpgsql-trigger-example"/> shows an example of a + trigger function in <application>PL/pgSQL</application>. + </para> + + <example id="plpgsql-trigger-example"> + <title>A <application>PL/pgSQL</application> Trigger Function</title> + + <para> + This example trigger ensures that any time a row is inserted or updated + in the table, the current user name and time are stamped into the + row. And it checks that an employee's name is given and that the + salary is a positive value. + </para> + +<programlisting> +CREATE TABLE emp ( + empname text, + salary integer, + last_date timestamp, + last_user text +); + +CREATE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$ + BEGIN + -- Check that empname and salary are given + IF NEW.empname IS NULL THEN + RAISE EXCEPTION 'empname cannot be null'; + END IF; + IF NEW.salary IS NULL THEN + RAISE EXCEPTION '% cannot have null salary', NEW.empname; + END IF; + + -- Who works for us when they must pay for it? + IF NEW.salary < 0 THEN + RAISE EXCEPTION '% cannot have a negative salary', NEW.empname; + END IF; + + -- Remember who changed the payroll when + NEW.last_date := current_timestamp; + NEW.last_user := current_user; + RETURN NEW; + END; +$emp_stamp$ LANGUAGE plpgsql; + +CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp + FOR EACH ROW EXECUTE FUNCTION emp_stamp(); +</programlisting> + </example> + + <para> + Another way to log changes to a table involves creating a new table that + holds a row for each insert, update, or delete that occurs. This approach + can be thought of as auditing changes to a table. + <xref linkend="plpgsql-trigger-audit-example"/> shows an example of an + audit trigger function in <application>PL/pgSQL</application>. + </para> + + <example id="plpgsql-trigger-audit-example"> + <title>A <application>PL/pgSQL</application> Trigger Function for Auditing</title> + + <para> + This example trigger ensures that any insert, update or delete of a row + in the <literal>emp</literal> table is recorded (i.e., audited) in the <literal>emp_audit</literal> table. + The current time and user name are stamped into the row, together with + the type of operation performed on it. + </para> + +<programlisting> +CREATE TABLE emp ( + empname text NOT NULL, + salary integer +); + +CREATE TABLE emp_audit( + operation char(1) NOT NULL, + stamp timestamp NOT NULL, + userid text NOT NULL, + empname text NOT NULL, + salary integer +); + +CREATE OR REPLACE FUNCTION process_emp_audit() RETURNS TRIGGER AS $emp_audit$ + BEGIN + -- + -- Create a row in emp_audit to reflect the operation performed on emp, + -- making use of the special variable TG_OP to work out the operation. + -- + IF (TG_OP = 'DELETE') THEN + INSERT INTO emp_audit SELECT 'D', now(), user, OLD.*; + ELSIF (TG_OP = 'UPDATE') THEN + INSERT INTO emp_audit SELECT 'U', now(), user, NEW.*; + ELSIF (TG_OP = 'INSERT') THEN + INSERT INTO emp_audit SELECT 'I', now(), user, NEW.*; + END IF; + RETURN NULL; -- result is ignored since this is an AFTER trigger + END; +$emp_audit$ LANGUAGE plpgsql; + +CREATE TRIGGER emp_audit +AFTER INSERT OR UPDATE OR DELETE ON emp + FOR EACH ROW EXECUTE FUNCTION process_emp_audit(); +</programlisting> + </example> + + <para> + A variation of the previous example uses a view joining the main table + to the audit table, to show when each entry was last modified. This + approach still records the full audit trail of changes to the table, + but also presents a simplified view of the audit trail, showing just + the last modified timestamp derived from the audit trail for each entry. + <xref linkend="plpgsql-view-trigger-audit-example"/> shows an example + of an audit trigger on a view in <application>PL/pgSQL</application>. + </para> + + <example id="plpgsql-view-trigger-audit-example"> + <title>A <application>PL/pgSQL</application> View Trigger Function for Auditing</title> + + <para> + This example uses a trigger on the view to make it updatable, and + ensure that any insert, update or delete of a row in the view is + recorded (i.e., audited) in the <literal>emp_audit</literal> table. The current time + and user name are recorded, together with the type of operation + performed, and the view displays the last modified time of each row. + </para> + +<programlisting> +CREATE TABLE emp ( + empname text PRIMARY KEY, + salary integer +); + +CREATE TABLE emp_audit( + operation char(1) NOT NULL, + userid text NOT NULL, + empname text NOT NULL, + salary integer, + stamp timestamp NOT NULL +); + +CREATE VIEW emp_view AS + SELECT e.empname, + e.salary, + max(ea.stamp) AS last_updated + FROM emp e + LEFT JOIN emp_audit ea ON ea.empname = e.empname + GROUP BY 1, 2; + +CREATE OR REPLACE FUNCTION update_emp_view() RETURNS TRIGGER AS $$ + BEGIN + -- + -- Perform the required operation on emp, and create a row in emp_audit + -- to reflect the change made to emp. + -- + IF (TG_OP = 'DELETE') THEN + DELETE FROM emp WHERE empname = OLD.empname; + IF NOT FOUND THEN RETURN NULL; END IF; + + OLD.last_updated = now(); + INSERT INTO emp_audit VALUES('D', user, OLD.*); + RETURN OLD; + ELSIF (TG_OP = 'UPDATE') THEN + UPDATE emp SET salary = NEW.salary WHERE empname = OLD.empname; + IF NOT FOUND THEN RETURN NULL; END IF; + + NEW.last_updated = now(); + INSERT INTO emp_audit VALUES('U', user, NEW.*); + RETURN NEW; + ELSIF (TG_OP = 'INSERT') THEN + INSERT INTO emp VALUES(NEW.empname, NEW.salary); + + NEW.last_updated = now(); + INSERT INTO emp_audit VALUES('I', user, NEW.*); + RETURN NEW; + END IF; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER emp_audit +INSTEAD OF INSERT OR UPDATE OR DELETE ON emp_view + FOR EACH ROW EXECUTE FUNCTION update_emp_view(); +</programlisting> + </example> + + <para> + One use of triggers is to maintain a summary table + of another table. The resulting summary can be used in place of the + original table for certain queries — often with vastly reduced run + times. + This technique is commonly used in Data Warehousing, where the tables + of measured or observed data (called fact tables) might be extremely large. + <xref linkend="plpgsql-trigger-summary-example"/> shows an example of a + trigger function in <application>PL/pgSQL</application> that maintains + a summary table for a fact table in a data warehouse. + </para> + + + <example id="plpgsql-trigger-summary-example"> + <title>A <application>PL/pgSQL</application> Trigger Function for Maintaining a Summary Table</title> + + <para> + The schema detailed here is partly based on the <emphasis>Grocery Store + </emphasis> example from <emphasis>The Data Warehouse Toolkit</emphasis> + by Ralph Kimball. + </para> + +<programlisting> +-- +-- Main tables - time dimension and sales fact. +-- +CREATE TABLE time_dimension ( + time_key integer NOT NULL, + day_of_week integer NOT NULL, + day_of_month integer NOT NULL, + month integer NOT NULL, + quarter integer NOT NULL, + year integer NOT NULL +); +CREATE UNIQUE INDEX time_dimension_key ON time_dimension(time_key); + +CREATE TABLE sales_fact ( + time_key integer NOT NULL, + product_key integer NOT NULL, + store_key integer NOT NULL, + amount_sold numeric(12,2) NOT NULL, + units_sold integer NOT NULL, + amount_cost numeric(12,2) NOT NULL +); +CREATE INDEX sales_fact_time ON sales_fact(time_key); + +-- +-- Summary table - sales by time. +-- +CREATE TABLE sales_summary_bytime ( + time_key integer NOT NULL, + amount_sold numeric(15,2) NOT NULL, + units_sold numeric(12) NOT NULL, + amount_cost numeric(15,2) NOT NULL +); +CREATE UNIQUE INDEX sales_summary_bytime_key ON sales_summary_bytime(time_key); + +-- +-- Function and trigger to amend summarized column(s) on UPDATE, INSERT, DELETE. +-- +CREATE OR REPLACE FUNCTION maint_sales_summary_bytime() RETURNS TRIGGER +AS $maint_sales_summary_bytime$ + DECLARE + delta_time_key integer; + delta_amount_sold numeric(15,2); + delta_units_sold numeric(12); + delta_amount_cost numeric(15,2); + BEGIN + + -- Work out the increment/decrement amount(s). + IF (TG_OP = 'DELETE') THEN + + delta_time_key = OLD.time_key; + delta_amount_sold = -1 * OLD.amount_sold; + delta_units_sold = -1 * OLD.units_sold; + delta_amount_cost = -1 * OLD.amount_cost; + + ELSIF (TG_OP = 'UPDATE') THEN + + -- forbid updates that change the time_key - + -- (probably not too onerous, as DELETE + INSERT is how most + -- changes will be made). + IF ( OLD.time_key != NEW.time_key) THEN + RAISE EXCEPTION 'Update of time_key : % -> % not allowed', + OLD.time_key, NEW.time_key; + END IF; + + delta_time_key = OLD.time_key; + delta_amount_sold = NEW.amount_sold - OLD.amount_sold; + delta_units_sold = NEW.units_sold - OLD.units_sold; + delta_amount_cost = NEW.amount_cost - OLD.amount_cost; + + ELSIF (TG_OP = 'INSERT') THEN + + delta_time_key = NEW.time_key; + delta_amount_sold = NEW.amount_sold; + delta_units_sold = NEW.units_sold; + delta_amount_cost = NEW.amount_cost; + + END IF; + + + -- Insert or update the summary row with the new values. + <<insert_update>> + LOOP + UPDATE sales_summary_bytime + SET amount_sold = amount_sold + delta_amount_sold, + units_sold = units_sold + delta_units_sold, + amount_cost = amount_cost + delta_amount_cost + WHERE time_key = delta_time_key; + + EXIT insert_update WHEN found; + + BEGIN + INSERT INTO sales_summary_bytime ( + time_key, + amount_sold, + units_sold, + amount_cost) + VALUES ( + delta_time_key, + delta_amount_sold, + delta_units_sold, + delta_amount_cost + ); + + EXIT insert_update; + + EXCEPTION + WHEN UNIQUE_VIOLATION THEN + -- do nothing + END; + END LOOP insert_update; + + RETURN NULL; + + END; +$maint_sales_summary_bytime$ LANGUAGE plpgsql; + +CREATE TRIGGER maint_sales_summary_bytime +AFTER INSERT OR UPDATE OR DELETE ON sales_fact + FOR EACH ROW EXECUTE FUNCTION maint_sales_summary_bytime(); + +INSERT INTO sales_fact VALUES(1,1,1,10,3,15); +INSERT INTO sales_fact VALUES(1,2,1,20,5,35); +INSERT INTO sales_fact VALUES(2,2,1,40,15,135); +INSERT INTO sales_fact VALUES(2,3,1,10,1,13); +SELECT * FROM sales_summary_bytime; +DELETE FROM sales_fact WHERE product_key = 1; +SELECT * FROM sales_summary_bytime; +UPDATE sales_fact SET units_sold = units_sold * 2; +SELECT * FROM sales_summary_bytime; +</programlisting> + </example> + + <para> + <literal>AFTER</literal> triggers can also make use of <firstterm>transition + tables</firstterm> to inspect the entire set of rows changed by the triggering + statement. The <command>CREATE TRIGGER</command> command assigns names to one + or both transition tables, and then the function can refer to those names + as though they were read-only temporary tables. + <xref linkend="plpgsql-trigger-audit-transition-example"/> shows an example. + </para> + + <example id="plpgsql-trigger-audit-transition-example"> + <title>Auditing with Transition Tables</title> + + <para> + This example produces the same results as + <xref linkend="plpgsql-trigger-audit-example"/>, but instead of using a + trigger that fires for every row, it uses a trigger that fires once + per statement, after collecting the relevant information in a transition + table. This can be significantly faster than the row-trigger approach + when the invoking statement has modified many rows. Notice that we must + make a separate trigger declaration for each kind of event, since the + <literal>REFERENCING</literal> clauses must be different for each case. But + this does not stop us from using a single trigger function if we choose. + (In practice, it might be better to use three separate functions and + avoid the run-time tests on <varname>TG_OP</varname>.) + </para> + +<programlisting> +CREATE TABLE emp ( + empname text NOT NULL, + salary integer +); + +CREATE TABLE emp_audit( + operation char(1) NOT NULL, + stamp timestamp NOT NULL, + userid text NOT NULL, + empname text NOT NULL, + salary integer +); + +CREATE OR REPLACE FUNCTION process_emp_audit() RETURNS TRIGGER AS $emp_audit$ + BEGIN + -- + -- Create rows in emp_audit to reflect the operations performed on emp, + -- making use of the special variable TG_OP to work out the operation. + -- + IF (TG_OP = 'DELETE') THEN + INSERT INTO emp_audit + SELECT 'D', now(), user, o.* FROM old_table o; + ELSIF (TG_OP = 'UPDATE') THEN + INSERT INTO emp_audit + SELECT 'U', now(), user, n.* FROM new_table n; + ELSIF (TG_OP = 'INSERT') THEN + INSERT INTO emp_audit + SELECT 'I', now(), user, n.* FROM new_table n; + END IF; + RETURN NULL; -- result is ignored since this is an AFTER trigger + END; +$emp_audit$ LANGUAGE plpgsql; + +CREATE TRIGGER emp_audit_ins + AFTER INSERT ON emp + REFERENCING NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE FUNCTION process_emp_audit(); +CREATE TRIGGER emp_audit_upd + AFTER UPDATE ON emp + REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE FUNCTION process_emp_audit(); +CREATE TRIGGER emp_audit_del + AFTER DELETE ON emp + REFERENCING OLD TABLE AS old_table + FOR EACH STATEMENT EXECUTE FUNCTION process_emp_audit(); +</programlisting> + </example> + +</sect2> + + <sect2 id="plpgsql-event-trigger"> + <title>Triggers on Events</title> + + <para> + <application>PL/pgSQL</application> can be used to define + <link linkend="event-triggers">event triggers</link>. + <productname>PostgreSQL</productname> requires that a function that + is to be called as an event trigger must be declared as a function with + no arguments and a return type of <literal>event_trigger</literal>. + </para> + + <para> + When a <application>PL/pgSQL</application> function is called as an + event trigger, several special variables are created automatically + in the top-level block. They are: + + <variablelist> + <varlistentry> + <term><varname>TG_EVENT</varname></term> + <listitem> + <para> + Data type <type>text</type>; a string representing the event the + trigger is fired for. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>TG_TAG</varname></term> + <listitem> + <para> + Data type <type>text</type>; variable that contains the command tag + for which the trigger is fired. + </para> + </listitem> + </varlistentry> + </variablelist> + </para> + + <para> + <xref linkend="plpgsql-event-trigger-example"/> shows an example of an + event trigger function in <application>PL/pgSQL</application>. + </para> + + <example id="plpgsql-event-trigger-example"> + <title>A <application>PL/pgSQL</application> Event Trigger Function</title> + + <para> + This example trigger simply raises a <literal>NOTICE</literal> message + each time a supported command is executed. + </para> + +<programlisting> +CREATE OR REPLACE FUNCTION snitch() RETURNS event_trigger AS $$ +BEGIN + RAISE NOTICE 'snitch: % %', tg_event, tg_tag; +END; +$$ LANGUAGE plpgsql; + +CREATE EVENT TRIGGER snitch ON ddl_command_start EXECUTE FUNCTION snitch(); +</programlisting> + </example> + </sect2> + + </sect1> + + <sect1 id="plpgsql-implementation"> + <title><application>PL/pgSQL</application> under the Hood</title> + + <para> + This section discusses some implementation details that are + frequently important for <application>PL/pgSQL</application> users to know. + </para> + + <sect2 id="plpgsql-var-subst"> + <title>Variable Substitution</title> + + <para> + SQL statements and expressions within a <application>PL/pgSQL</application> function + can refer to variables and parameters of the function. Behind the scenes, + <application>PL/pgSQL</application> substitutes query parameters for such references. + Query parameters will only be substituted in places where they are + syntactically permissible. As an extreme case, consider + this example of poor programming style: +<programlisting> +INSERT INTO foo (foo) VALUES (foo(foo)); +</programlisting> + The first occurrence of <literal>foo</literal> must syntactically be a table + name, so it will not be substituted, even if the function has a variable + named <literal>foo</literal>. The second occurrence must be the name of a + column of that table, so it will not be substituted either. Likewise + the third occurrence must be a function name, so it also will not be + substituted for. Only the last occurrence is a candidate to be a + reference to a variable of the <application>PL/pgSQL</application> + function. + </para> + + <para> + Another way to understand this is that variable substitution can only + insert data values into an SQL command; it cannot dynamically change which + database objects are referenced by the command. (If you want to do + that, you must build a command string dynamically, as explained in + <xref linkend="plpgsql-statements-executing-dyn"/>.) + </para> + + <para> + Since the names of variables are syntactically no different from the names + of table columns, there can be ambiguity in statements that also refer to + tables: is a given name meant to refer to a table column, or a variable? + Let's change the previous example to +<programlisting> +INSERT INTO dest (col) SELECT foo + bar FROM src; +</programlisting> + Here, <literal>dest</literal> and <literal>src</literal> must be table names, and + <literal>col</literal> must be a column of <literal>dest</literal>, but <literal>foo</literal> + and <literal>bar</literal> might reasonably be either variables of the function + or columns of <literal>src</literal>. + </para> + + <para> + By default, <application>PL/pgSQL</application> will report an error if a name + in an SQL statement could refer to either a variable or a table column. + You can fix such a problem by renaming the variable or column, + or by qualifying the ambiguous reference, or by telling + <application>PL/pgSQL</application> which interpretation to prefer. + </para> + + <para> + The simplest solution is to rename the variable or column. + A common coding rule is to use a + different naming convention for <application>PL/pgSQL</application> + variables than you use for column names. For example, + if you consistently name function variables + <literal>v_<replaceable>something</replaceable></literal> while none of your + column names start with <literal>v_</literal>, no conflicts will occur. + </para> + + <para> + Alternatively you can qualify ambiguous references to make them clear. + In the above example, <literal>src.foo</literal> would be an unambiguous reference + to the table column. To create an unambiguous reference to a variable, + declare it in a labeled block and use the block's label + (see <xref linkend="plpgsql-structure"/>). For example, +<programlisting> +<<block>> +DECLARE + foo int; +BEGIN + foo := ...; + INSERT INTO dest (col) SELECT block.foo + bar FROM src; +</programlisting> + Here <literal>block.foo</literal> means the variable even if there is a column + <literal>foo</literal> in <literal>src</literal>. Function parameters, as well as + special variables such as <literal>FOUND</literal>, can be qualified by the + function's name, because they are implicitly declared in an outer block + labeled with the function's name. + </para> + + <para> + Sometimes it is impractical to fix all the ambiguous references in a + large body of <application>PL/pgSQL</application> code. In such cases you can + specify that <application>PL/pgSQL</application> should resolve ambiguous references + as the variable (which is compatible with <application>PL/pgSQL</application>'s + behavior before <productname>PostgreSQL</productname> 9.0), or as the + table column (which is compatible with some other systems such as + <productname>Oracle</productname>). + </para> + + <indexterm> + <primary><varname>plpgsql.variable_conflict</varname> configuration parameter</primary> + </indexterm> + + <para> + To change this behavior on a system-wide basis, set the configuration + parameter <literal>plpgsql.variable_conflict</literal> to one of + <literal>error</literal>, <literal>use_variable</literal>, or + <literal>use_column</literal> (where <literal>error</literal> is the factory default). + This parameter affects subsequent compilations + of statements in <application>PL/pgSQL</application> functions, but not statements + already compiled in the current session. + Because changing this setting + can cause unexpected changes in the behavior of <application>PL/pgSQL</application> + functions, it can only be changed by a superuser. + </para> + + <para> + You can also set the behavior on a function-by-function basis, by + inserting one of these special commands at the start of the function + text: +<programlisting> +#variable_conflict error +#variable_conflict use_variable +#variable_conflict use_column +</programlisting> + These commands affect only the function they are written in, and override + the setting of <literal>plpgsql.variable_conflict</literal>. An example is +<programlisting> +CREATE FUNCTION stamp_user(id int, comment text) RETURNS void AS $$ + #variable_conflict use_variable + DECLARE + curtime timestamp := now(); + BEGIN + UPDATE users SET last_modified = curtime, comment = comment + WHERE users.id = id; + END; +$$ LANGUAGE plpgsql; +</programlisting> + In the <literal>UPDATE</literal> command, <literal>curtime</literal>, <literal>comment</literal>, + and <literal>id</literal> will refer to the function's variable and parameters + whether or not <literal>users</literal> has columns of those names. Notice + that we had to qualify the reference to <literal>users.id</literal> in the + <literal>WHERE</literal> clause to make it refer to the table column. + But we did not have to qualify the reference to <literal>comment</literal> + as a target in the <literal>UPDATE</literal> list, because syntactically + that must be a column of <literal>users</literal>. We could write the same + function without depending on the <literal>variable_conflict</literal> setting + in this way: +<programlisting> +CREATE FUNCTION stamp_user(id int, comment text) RETURNS void AS $$ + <<fn>> + DECLARE + curtime timestamp := now(); + BEGIN + UPDATE users SET last_modified = fn.curtime, comment = stamp_user.comment + WHERE users.id = stamp_user.id; + END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + Variable substitution does not happen in a command string given + to <command>EXECUTE</command> or one of its variants. If you need to + insert a varying value into such a command, do so as part of + constructing the string value, or use <literal>USING</literal>, as illustrated in + <xref linkend="plpgsql-statements-executing-dyn"/>. + </para> + + <para> + Variable substitution currently works only in <command>SELECT</command>, + <command>INSERT</command>, <command>UPDATE</command>, + <command>DELETE</command>, and commands containing one of + these (such as <command>EXPLAIN</command> and <command>CREATE TABLE + ... AS SELECT</command>), + because the main SQL engine allows query parameters only in these + commands. To use a non-constant name or value in other statement + types (generically called utility statements), you must construct + the utility statement as a string and <command>EXECUTE</command> it. + </para> + + </sect2> + + <sect2 id="plpgsql-plan-caching"> + <title>Plan Caching</title> + + <para> + The <application>PL/pgSQL</application> interpreter parses the function's source + text and produces an internal binary instruction tree the first time the + function is called (within each session). The instruction tree + fully translates the + <application>PL/pgSQL</application> statement structure, but individual + <acronym>SQL</acronym> expressions and <acronym>SQL</acronym> commands + used in the function are not translated immediately. + </para> + + <para> + <indexterm> + <primary>preparing a query</primary> + <secondary>in PL/pgSQL</secondary> + </indexterm> + As each expression and <acronym>SQL</acronym> command is first + executed in the function, the <application>PL/pgSQL</application> interpreter + parses and analyzes the command to create a prepared statement, + using the <acronym>SPI</acronym> manager's + <function>SPI_prepare</function> function. + Subsequent visits to that expression or command + reuse the prepared statement. Thus, a function with conditional code + paths that are seldom visited will never incur the overhead of + analyzing those commands that are never executed within the current + session. A disadvantage is that errors + in a specific expression or command cannot be detected until that + part of the function is reached in execution. (Trivial syntax + errors will be detected during the initial parsing pass, but + anything deeper will not be detected until execution.) + </para> + + <para> + <application>PL/pgSQL</application> (or more precisely, the SPI manager) can + furthermore attempt to cache the execution plan associated with any + particular prepared statement. If a cached plan is not used, then + a fresh execution plan is generated on each visit to the statement, + and the current parameter values (that is, <application>PL/pgSQL</application> + variable values) can be used to optimize the selected plan. If the + statement has no parameters, or is executed many times, the SPI manager + will consider creating a <firstterm>generic</firstterm> plan that is not dependent + on specific parameter values, and caching that for re-use. Typically + this will happen only if the execution plan is not very sensitive to + the values of the <application>PL/pgSQL</application> variables referenced in it. + If it is, generating a plan each time is a net win. See <xref + linkend="sql-prepare"/> for more information about the behavior of + prepared statements. + </para> + + <para> + Because <application>PL/pgSQL</application> saves prepared statements + and sometimes execution plans in this way, + SQL commands that appear directly in a + <application>PL/pgSQL</application> function must refer to the + same tables and columns on every execution; that is, you cannot use + a parameter as the name of a table or column in an SQL command. To get + around this restriction, you can construct dynamic commands using + the <application>PL/pgSQL</application> <command>EXECUTE</command> + statement — at the price of performing new parse analysis and + constructing a new execution plan on every execution. + </para> + + <para> + The mutable nature of record variables presents another problem in this + connection. When fields of a record variable are used in + expressions or statements, the data types of the fields must not + change from one call of the function to the next, since each + expression will be analyzed using the data type that is present + when the expression is first reached. <command>EXECUTE</command> can be + used to get around this problem when necessary. + </para> + + <para> + If the same function is used as a trigger for more than one table, + <application>PL/pgSQL</application> prepares and caches statements + independently for each such table — that is, there is a cache + for each trigger function and table combination, not just for each + function. This alleviates some of the problems with varying + data types; for instance, a trigger function will be able to work + successfully with a column named <literal>key</literal> even if it happens + to have different types in different tables. + </para> + + <para> + Likewise, functions having polymorphic argument types have a separate + statement cache for each combination of actual argument types they have + been invoked for, so that data type differences do not cause unexpected + failures. + </para> + + <para> + Statement caching can sometimes have surprising effects on the + interpretation of time-sensitive values. For example there + is a difference between what these two functions do: + +<programlisting> +CREATE FUNCTION logfunc1(logtxt text) RETURNS void AS $$ + BEGIN + INSERT INTO logtable VALUES (logtxt, 'now'); + END; +$$ LANGUAGE plpgsql; +</programlisting> + + and: + +<programlisting> +CREATE FUNCTION logfunc2(logtxt text) RETURNS void AS $$ + DECLARE + curtime timestamp; + BEGIN + curtime := 'now'; + INSERT INTO logtable VALUES (logtxt, curtime); + END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + In the case of <function>logfunc1</function>, the + <productname>PostgreSQL</productname> main parser knows when + analyzing the <command>INSERT</command> that the + string <literal>'now'</literal> should be interpreted as + <type>timestamp</type>, because the target column of + <classname>logtable</classname> is of that type. Thus, + <literal>'now'</literal> will be converted to a <type>timestamp</type> + constant when the + <command>INSERT</command> is analyzed, and then used in all + invocations of <function>logfunc1</function> during the lifetime + of the session. Needless to say, this isn't what the programmer + wanted. A better idea is to use the <literal>now()</literal> or + <literal>current_timestamp</literal> function. + </para> + + <para> + In the case of <function>logfunc2</function>, the + <productname>PostgreSQL</productname> main parser does not know + what type <literal>'now'</literal> should become and therefore + it returns a data value of type <type>text</type> containing the string + <literal>now</literal>. During the ensuing assignment + to the local variable <varname>curtime</varname>, the + <application>PL/pgSQL</application> interpreter casts this + string to the <type>timestamp</type> type by calling the + <function>textout</function> and <function>timestamp_in</function> + functions for the conversion. So, the computed time stamp is updated + on each execution as the programmer expects. Even though this + happens to work as expected, it's not terribly efficient, so + use of the <literal>now()</literal> function would still be a better idea. + </para> + + </sect2> + + </sect1> + + <sect1 id="plpgsql-development-tips"> + <title>Tips for Developing in <application>PL/pgSQL</application></title> + + <para> + One good way to develop in + <application>PL/pgSQL</application> is to use the text editor of your + choice to create your functions, and in another window, use + <application>psql</application> to load and test those functions. + If you are doing it this way, it + is a good idea to write the function using <command>CREATE OR + REPLACE FUNCTION</command>. That way you can just reload the file to update + the function definition. For example: +<programlisting> +CREATE OR REPLACE FUNCTION testfunc(integer) RETURNS integer AS $$ + .... +$$ LANGUAGE plpgsql; +</programlisting> + </para> + + <para> + While running <application>psql</application>, you can load or reload such + a function definition file with: +<programlisting> +\i filename.sql +</programlisting> + and then immediately issue SQL commands to test the function. + </para> + + <para> + Another good way to develop in <application>PL/pgSQL</application> is with a + GUI database access tool that facilitates development in a + procedural language. One example of such a tool is + <application>pgAdmin</application>, although others exist. These tools often + provide convenient features such as escaping single quotes and + making it easier to recreate and debug functions. + </para> + + <sect2 id="plpgsql-quote-tips"> + <title>Handling of Quotation Marks</title> + + <para> + The code of a <application>PL/pgSQL</application> function is specified in + <command>CREATE FUNCTION</command> as a string literal. If you + write the string literal in the ordinary way with surrounding + single quotes, then any single quotes inside the function body + must be doubled; likewise any backslashes must be doubled (assuming + escape string syntax is used). + Doubling quotes is at best tedious, and in more complicated cases + the code can become downright incomprehensible, because you can + easily find yourself needing half a dozen or more adjacent quote marks. + It's recommended that you instead write the function body as a + <quote>dollar-quoted</quote> string literal (see <xref + linkend="sql-syntax-dollar-quoting"/>). In the dollar-quoting + approach, you never double any quote marks, but instead take care to + choose a different dollar-quoting delimiter for each level of + nesting you need. For example, you might write the <command>CREATE + FUNCTION</command> command as: +<programlisting> +CREATE OR REPLACE FUNCTION testfunc(integer) RETURNS integer AS $PROC$ + .... +$PROC$ LANGUAGE plpgsql; +</programlisting> + Within this, you might use quote marks for simple literal strings in + SQL commands and <literal>$$</literal> to delimit fragments of SQL commands + that you are assembling as strings. If you need to quote text that + includes <literal>$$</literal>, you could use <literal>$Q$</literal>, and so on. + </para> + + <para> + The following chart shows what you have to do when writing quote + marks without dollar quoting. It might be useful when translating + pre-dollar quoting code into something more comprehensible. + </para> + + <variablelist> + <varlistentry> + <term>1 quotation mark</term> + <listitem> + <para> + To begin and end the function body, for example: +<programlisting> +CREATE FUNCTION foo() RETURNS integer AS ' + .... +' LANGUAGE plpgsql; +</programlisting> + Anywhere within a single-quoted function body, quote marks + <emphasis>must</emphasis> appear in pairs. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>2 quotation marks</term> + <listitem> + <para> + For string literals inside the function body, for example: +<programlisting> +a_output := ''Blah''; +SELECT * FROM users WHERE f_name=''foobar''; +</programlisting> + In the dollar-quoting approach, you'd just write: +<programlisting> +a_output := 'Blah'; +SELECT * FROM users WHERE f_name='foobar'; +</programlisting> + which is exactly what the <application>PL/pgSQL</application> parser would see + in either case. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>4 quotation marks</term> + <listitem> + <para> + When you need a single quotation mark in a string constant inside the + function body, for example: +<programlisting> +a_output := a_output || '' AND name LIKE ''''foobar'''' AND xyz'' +</programlisting> + The value actually appended to <literal>a_output</literal> would be: + <literal> AND name LIKE 'foobar' AND xyz</literal>. + </para> + <para> + In the dollar-quoting approach, you'd write: +<programlisting> +a_output := a_output || $$ AND name LIKE 'foobar' AND xyz$$ +</programlisting> + being careful that any dollar-quote delimiters around this are not + just <literal>$$</literal>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>6 quotation marks</term> + <listitem> + <para> + When a single quotation mark in a string inside the function body is + adjacent to the end of that string constant, for example: +<programlisting> +a_output := a_output || '' AND name LIKE ''''foobar'''''' +</programlisting> + The value appended to <literal>a_output</literal> would then be: + <literal> AND name LIKE 'foobar'</literal>. + </para> + <para> + In the dollar-quoting approach, this becomes: +<programlisting> +a_output := a_output || $$ AND name LIKE 'foobar'$$ +</programlisting> + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>10 quotation marks</term> + <listitem> + <para> + When you want two single quotation marks in a string constant (which + accounts for 8 quotation marks) and this is adjacent to the end of that + string constant (2 more). You will probably only need that if + you are writing a function that generates other functions, as in + <xref linkend="plpgsql-porting-ex2"/>. + For example: +<programlisting> +a_output := a_output || '' if v_'' || + referrer_keys.kind || '' like '''''''''' + || referrer_keys.key_string || '''''''''' + then return '''''' || referrer_keys.referrer_type + || ''''''; end if;''; +</programlisting> + The value of <literal>a_output</literal> would then be: +<programlisting> +if v_... like ''...'' then return ''...''; end if; +</programlisting> + </para> + <para> + In the dollar-quoting approach, this becomes: +<programlisting> +a_output := a_output || $$ if v_$$ || referrer_keys.kind || $$ like '$$ + || referrer_keys.key_string || $$' + then return '$$ || referrer_keys.referrer_type + || $$'; end if;$$; +</programlisting> + where we assume we only need to put single quote marks into + <literal>a_output</literal>, because it will be re-quoted before use. + </para> + </listitem> + </varlistentry> + </variablelist> + + </sect2> + <sect2 id="plpgsql-extra-checks"> + <title>Additional Compile-Time and Run-Time Checks</title> + + <para> + To aid the user in finding instances of simple but common problems before + they cause harm, <application>PL/pgSQL</application> provides additional + <replaceable>checks</replaceable>. When enabled, depending on the configuration, they + can be used to emit either a <literal>WARNING</literal> or an <literal>ERROR</literal> + during the compilation of a function. A function which has received + a <literal>WARNING</literal> can be executed without producing further messages, + so you are advised to test in a separate development environment. + </para> + + <para> + Setting <varname>plpgsql.extra_warnings</varname>, or + <varname>plpgsql.extra_errors</varname>, as appropriate, to <literal>"all"</literal> + is encouraged in development and/or testing environments. + </para> + + <para> + These additional checks are enabled through the configuration variables + <varname>plpgsql.extra_warnings</varname> for warnings and + <varname>plpgsql.extra_errors</varname> for errors. Both can be set either to + a comma-separated list of checks, <literal>"none"</literal> or + <literal>"all"</literal>. The default is <literal>"none"</literal>. Currently + the list of available checks includes: + <variablelist> + <varlistentry> + <term><varname>shadowed_variables</varname></term> + <listitem> + <para> + Checks if a declaration shadows a previously defined variable. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>strict_multi_assignment</varname></term> + <listitem> + <para> + Some <application>PL/PgSQL</application> commands allow assigning + values to more than one variable at a time, such as + <command>SELECT INTO</command>. Typically, the number of target + variables and the number of source variables should match, though + <application>PL/PgSQL</application> will use <literal>NULL</literal> + for missing values and extra variables are ignored. Enabling this + check will cause <application>PL/PgSQL</application> to throw a + <literal>WARNING</literal> or <literal>ERROR</literal> whenever the + number of target variables and the number of source variables are + different. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>too_many_rows</varname></term> + <listitem> + <para> + Enabling this check will cause <application>PL/PgSQL</application> to + check if a given query returns more than one row when an + <literal>INTO</literal> clause is used. As an <literal>INTO</literal> + statement will only ever use one row, having a query return multiple + rows is generally either inefficient and/or nondeterministic and + therefore is likely an error. + </para> + </listitem> + </varlistentry> + </variablelist> + + The following example shows the effect of <varname>plpgsql.extra_warnings</varname> + set to <varname>shadowed_variables</varname>: +<programlisting> +SET plpgsql.extra_warnings TO 'shadowed_variables'; + +CREATE FUNCTION foo(f1 int) RETURNS int AS $$ +DECLARE +f1 int; +BEGIN +RETURN f1; +END; +$$ LANGUAGE plpgsql; +WARNING: variable "f1" shadows a previously defined variable +LINE 3: f1 int; + ^ +CREATE FUNCTION +</programlisting> + The below example shows the effects of setting + <varname>plpgsql.extra_warnings</varname> to + <varname>strict_multi_assignment</varname>: +<programlisting> +SET plpgsql.extra_warnings TO 'strict_multi_assignment'; + +CREATE OR REPLACE FUNCTION public.foo() + RETURNS void + LANGUAGE plpgsql +AS $$ +DECLARE + x int; + y int; +BEGIN + SELECT 1 INTO x, y; + SELECT 1, 2 INTO x, y; + SELECT 1, 2, 3 INTO x, y; +END; +$$; + +SELECT foo(); +WARNING: number of source and target fields in assignment does not match +DETAIL: strict_multi_assignment check of extra_warnings is active. +HINT: Make sure the query returns the exact list of columns. +WARNING: number of source and target fields in assignment does not match +DETAIL: strict_multi_assignment check of extra_warnings is active. +HINT: Make sure the query returns the exact list of columns. + + foo +----- + +(1 row) +</programlisting> + </para> + </sect2> + </sect1> + + <!-- **** Porting from Oracle PL/SQL **** --> + + <sect1 id="plpgsql-porting"> + <title>Porting from <productname>Oracle</productname> PL/SQL</title> + + <indexterm zone="plpgsql-porting"> + <primary>Oracle</primary> + <secondary>porting from PL/SQL to PL/pgSQL</secondary> + </indexterm> + + <indexterm zone="plpgsql-porting"> + <primary>PL/SQL (Oracle)</primary> + <secondary>porting to PL/pgSQL</secondary> + </indexterm> + + <para> + This section explains differences between + <productname>PostgreSQL</productname>'s <application>PL/pgSQL</application> + language and Oracle's <application>PL/SQL</application> language, + to help developers who port applications from + <trademark class="registered">Oracle</trademark> to <productname>PostgreSQL</productname>. + </para> + + <para> + <application>PL/pgSQL</application> is similar to PL/SQL in many + aspects. It is a block-structured, imperative language, and all + variables have to be declared. Assignments, loops, and conditionals + are similar. The main differences you should keep in mind when + porting from <application>PL/SQL</application> to + <application>PL/pgSQL</application> are: + + <itemizedlist> + <listitem> + <para> + If a name used in an SQL command could be either a column name of a + table used in the command or a reference to a variable of the function, + <application>PL/SQL</application> treats it as a column name. + By default, <application>PL/pgSQL</application> will throw an error + complaining that the name is ambiguous. You can specify + <literal>plpgsql.variable_conflict</literal> = <literal>use_column</literal> + to change this behavior to match <application>PL/SQL</application>, + as explained in <xref linkend="plpgsql-var-subst"/>. + It's often best to avoid such ambiguities in the first place, + but if you have to port a large amount of code that depends on + this behavior, setting <literal>variable_conflict</literal> may be the + best solution. + </para> + </listitem> + + <listitem> + <para> + In <productname>PostgreSQL</productname> the function body must be written as + a string literal. Therefore you need to use dollar quoting or escape + single quotes in the function body. (See <xref + linkend="plpgsql-quote-tips"/>.) + </para> + </listitem> + + <listitem> + <para> + Data type names often need translation. For example, in Oracle string + values are commonly declared as being of type <type>varchar2</type>, which + is a non-SQL-standard type. In <productname>PostgreSQL</productname>, + use type <type>varchar</type> or <type>text</type> instead. Similarly, replace + type <type>number</type> with <type>numeric</type>, or use some other numeric + data type if there's a more appropriate one. + </para> + </listitem> + + <listitem> + <para> + Instead of packages, use schemas to organize your functions + into groups. + </para> + </listitem> + + <listitem> + <para> + Since there are no packages, there are no package-level variables + either. This is somewhat annoying. You can keep per-session state + in temporary tables instead. + </para> + </listitem> + + <listitem> + <para> + Integer <command>FOR</command> loops with <literal>REVERSE</literal> work + differently: <application>PL/SQL</application> counts down from the second + number to the first, while <application>PL/pgSQL</application> counts down + from the first number to the second, requiring the loop bounds + to be swapped when porting. This incompatibility is unfortunate + but is unlikely to be changed. (See <xref + linkend="plpgsql-integer-for"/>.) + </para> + </listitem> + + <listitem> + <para> + <command>FOR</command> loops over queries (other than cursors) also work + differently: the target variable(s) must have been declared, + whereas <application>PL/SQL</application> always declares them implicitly. + An advantage of this is that the variable values are still accessible + after the loop exits. + </para> + </listitem> + + <listitem> + <para> + There are various notational differences for the use of cursor + variables. + </para> + </listitem> + + </itemizedlist> + </para> + + <sect2> + <title>Porting Examples</title> + + <para> + <xref linkend="pgsql-porting-ex1"/> shows how to port a simple + function from <application>PL/SQL</application> to <application>PL/pgSQL</application>. + </para> + + <example id="pgsql-porting-ex1"> + <title>Porting a Simple Function from <application>PL/SQL</application> to <application>PL/pgSQL</application></title> + + <para> + Here is an <productname>Oracle</productname> <application>PL/SQL</application> function: +<programlisting> +CREATE OR REPLACE FUNCTION cs_fmt_browser_version(v_name varchar2, + v_version varchar2) +RETURN varchar2 IS +BEGIN + IF v_version IS NULL THEN + RETURN v_name; + END IF; + RETURN v_name || '/' || v_version; +END; +/ +show errors; +</programlisting> + </para> + + <para> + Let's go through this function and see the differences compared to + <application>PL/pgSQL</application>: + + <itemizedlist> + <listitem> + <para> + The type name <type>varchar2</type> has to be changed to <type>varchar</type> + or <type>text</type>. In the examples in this section, we'll + use <type>varchar</type>, but <type>text</type> is often a better choice if + you do not need specific string length limits. + </para> + </listitem> + + <listitem> + <para> + The <literal>RETURN</literal> key word in the function + prototype (not the function body) becomes + <literal>RETURNS</literal> in + <productname>PostgreSQL</productname>. + Also, <literal>IS</literal> becomes <literal>AS</literal>, and you need to + add a <literal>LANGUAGE</literal> clause because <application>PL/pgSQL</application> + is not the only possible function language. + </para> + </listitem> + + <listitem> + <para> + In <productname>PostgreSQL</productname>, the function body is considered + to be a string literal, so you need to use quote marks or dollar + quotes around it. This substitutes for the terminating <literal>/</literal> + in the Oracle approach. + </para> + </listitem> + + <listitem> + <para> + The <literal>show errors</literal> command does not exist in + <productname>PostgreSQL</productname>, and is not needed since errors are + reported automatically. + </para> + </listitem> + </itemizedlist> + </para> + + <para> + This is how this function would look when ported to + <productname>PostgreSQL</productname>: + +<programlisting> +CREATE OR REPLACE FUNCTION cs_fmt_browser_version(v_name varchar, + v_version varchar) +RETURNS varchar AS $$ +BEGIN + IF v_version IS NULL THEN + RETURN v_name; + END IF; + RETURN v_name || '/' || v_version; +END; +$$ LANGUAGE plpgsql; +</programlisting> + </para> + </example> + + <para> + <xref linkend="plpgsql-porting-ex2"/> shows how to port a + function that creates another function and how to handle the + ensuing quoting problems. + </para> + + <example id="plpgsql-porting-ex2"> + <title>Porting a Function that Creates Another Function from <application>PL/SQL</application> to <application>PL/pgSQL</application></title> + + <para> + The following procedure grabs rows from a + <command>SELECT</command> statement and builds a large function + with the results in <literal>IF</literal> statements, for the + sake of efficiency. + </para> + + <para> + This is the Oracle version: +<programlisting> +CREATE OR REPLACE PROCEDURE cs_update_referrer_type_proc IS + CURSOR referrer_keys IS + SELECT * FROM cs_referrer_keys + ORDER BY try_order; + func_cmd VARCHAR(4000); +BEGIN + func_cmd := 'CREATE OR REPLACE FUNCTION cs_find_referrer_type(v_host IN VARCHAR2, + v_domain IN VARCHAR2, v_url IN VARCHAR2) RETURN VARCHAR2 IS BEGIN'; + + FOR referrer_key IN referrer_keys LOOP + func_cmd := func_cmd || + ' IF v_' || referrer_key.kind + || ' LIKE ''' || referrer_key.key_string + || ''' THEN RETURN ''' || referrer_key.referrer_type + || '''; END IF;'; + END LOOP; + + func_cmd := func_cmd || ' RETURN NULL; END;'; + + EXECUTE IMMEDIATE func_cmd; +END; +/ +show errors; +</programlisting> + </para> + + <para> + Here is how this function would end up in <productname>PostgreSQL</productname>: +<programlisting> +CREATE OR REPLACE PROCEDURE cs_update_referrer_type_proc() AS $func$ +DECLARE + referrer_keys CURSOR IS + SELECT * FROM cs_referrer_keys + ORDER BY try_order; + func_body text; + func_cmd text; +BEGIN + func_body := 'BEGIN'; + + FOR referrer_key IN referrer_keys LOOP + func_body := func_body || + ' IF v_' || referrer_key.kind + || ' LIKE ' || quote_literal(referrer_key.key_string) + || ' THEN RETURN ' || quote_literal(referrer_key.referrer_type) + || '; END IF;' ; + END LOOP; + + func_body := func_body || ' RETURN NULL; END;'; + + func_cmd := + 'CREATE OR REPLACE FUNCTION cs_find_referrer_type(v_host varchar, + v_domain varchar, + v_url varchar) + RETURNS varchar AS ' + || quote_literal(func_body) + || ' LANGUAGE plpgsql;' ; + + EXECUTE func_cmd; +END; +$func$ LANGUAGE plpgsql; +</programlisting> + Notice how the body of the function is built separately and passed + through <literal>quote_literal</literal> to double any quote marks in it. This + technique is needed because we cannot safely use dollar quoting for + defining the new function: we do not know for sure what strings will + be interpolated from the <structfield>referrer_key.key_string</structfield> field. + (We are assuming here that <structfield>referrer_key.kind</structfield> can be + trusted to always be <literal>host</literal>, <literal>domain</literal>, or + <literal>url</literal>, but <structfield>referrer_key.key_string</structfield> might be + anything, in particular it might contain dollar signs.) This function + is actually an improvement on the Oracle original, because it will + not generate broken code when <structfield>referrer_key.key_string</structfield> or + <structfield>referrer_key.referrer_type</structfield> contain quote marks. + </para> + </example> + + <para> + <xref linkend="plpgsql-porting-ex3"/> shows how to port a function + with <literal>OUT</literal> parameters and string manipulation. + <productname>PostgreSQL</productname> does not have a built-in + <function>instr</function> function, but you can create one + using a combination of other + functions. In <xref linkend="plpgsql-porting-appendix"/> there is a + <application>PL/pgSQL</application> implementation of + <function>instr</function> that you can use to make your porting + easier. + </para> + + <example id="plpgsql-porting-ex3"> + <title>Porting a Procedure With String Manipulation and + <literal>OUT</literal> Parameters from <application>PL/SQL</application> to + <application>PL/pgSQL</application></title> + + <para> + The following <productname>Oracle</productname> PL/SQL procedure is used + to parse a URL and return several elements (host, path, and query). + </para> + + <para> + This is the Oracle version: +<programlisting> +CREATE OR REPLACE PROCEDURE cs_parse_url( + v_url IN VARCHAR2, + v_host OUT VARCHAR2, -- This will be passed back + v_path OUT VARCHAR2, -- This one too + v_query OUT VARCHAR2) -- And this one +IS + a_pos1 INTEGER; + a_pos2 INTEGER; +BEGIN + v_host := NULL; + v_path := NULL; + v_query := NULL; + a_pos1 := instr(v_url, '//'); + + IF a_pos1 = 0 THEN + RETURN; + END IF; + a_pos2 := instr(v_url, '/', a_pos1 + 2); + IF a_pos2 = 0 THEN + v_host := substr(v_url, a_pos1 + 2); + v_path := '/'; + RETURN; + END IF; + + v_host := substr(v_url, a_pos1 + 2, a_pos2 - a_pos1 - 2); + a_pos1 := instr(v_url, '?', a_pos2 + 1); + + IF a_pos1 = 0 THEN + v_path := substr(v_url, a_pos2); + RETURN; + END IF; + + v_path := substr(v_url, a_pos2, a_pos1 - a_pos2); + v_query := substr(v_url, a_pos1 + 1); +END; +/ +show errors; +</programlisting> + </para> + + <para> + Here is a possible translation into <application>PL/pgSQL</application>: +<programlisting> +CREATE OR REPLACE FUNCTION cs_parse_url( + v_url IN VARCHAR, + v_host OUT VARCHAR, -- This will be passed back + v_path OUT VARCHAR, -- This one too + v_query OUT VARCHAR) -- And this one +AS $$ +DECLARE + a_pos1 INTEGER; + a_pos2 INTEGER; +BEGIN + v_host := NULL; + v_path := NULL; + v_query := NULL; + a_pos1 := instr(v_url, '//'); + + IF a_pos1 = 0 THEN + RETURN; + END IF; + a_pos2 := instr(v_url, '/', a_pos1 + 2); + IF a_pos2 = 0 THEN + v_host := substr(v_url, a_pos1 + 2); + v_path := '/'; + RETURN; + END IF; + + v_host := substr(v_url, a_pos1 + 2, a_pos2 - a_pos1 - 2); + a_pos1 := instr(v_url, '?', a_pos2 + 1); + + IF a_pos1 = 0 THEN + v_path := substr(v_url, a_pos2); + RETURN; + END IF; + + v_path := substr(v_url, a_pos2, a_pos1 - a_pos2); + v_query := substr(v_url, a_pos1 + 1); +END; +$$ LANGUAGE plpgsql; +</programlisting> + + This function could be used like this: +<programlisting> +SELECT * FROM cs_parse_url('http://foobar.com/query.cgi?baz'); +</programlisting> + </para> + </example> + + <para> + <xref linkend="plpgsql-porting-ex4"/> shows how to port a procedure + that uses numerous features that are specific to Oracle. + </para> + + <example id="plpgsql-porting-ex4"> + <title>Porting a Procedure from <application>PL/SQL</application> to <application>PL/pgSQL</application></title> + + <para> + The Oracle version: + +<programlisting> +CREATE OR REPLACE PROCEDURE cs_create_job(v_job_id IN INTEGER) IS + a_running_job_count INTEGER; +BEGIN + LOCK TABLE cs_jobs IN EXCLUSIVE MODE; + + SELECT count(*) INTO a_running_job_count FROM cs_jobs WHERE end_stamp IS NULL; + + IF a_running_job_count > 0 THEN + COMMIT; -- free lock + raise_application_error(-20000, + 'Unable to create a new job: a job is currently running.'); + END IF; + + DELETE FROM cs_active_job; + INSERT INTO cs_active_job(job_id) VALUES (v_job_id); + + BEGIN + INSERT INTO cs_jobs (job_id, start_stamp) VALUES (v_job_id, now()); + EXCEPTION + WHEN dup_val_on_index THEN NULL; -- don't worry if it already exists + END; + COMMIT; +END; +/ +show errors +</programlisting> + </para> + + <para> + This is how we could port this procedure to <application>PL/pgSQL</application>: + +<programlisting> +CREATE OR REPLACE PROCEDURE cs_create_job(v_job_id integer) AS $$ +DECLARE + a_running_job_count integer; +BEGIN + LOCK TABLE cs_jobs IN EXCLUSIVE MODE; + + SELECT count(*) INTO a_running_job_count FROM cs_jobs WHERE end_stamp IS NULL; + + IF a_running_job_count > 0 THEN + COMMIT; -- free lock + RAISE EXCEPTION 'Unable to create a new job: a job is currently running'; -- <co id="co.plpgsql-porting-raise"/> + END IF; + + DELETE FROM cs_active_job; + INSERT INTO cs_active_job(job_id) VALUES (v_job_id); + + BEGIN + INSERT INTO cs_jobs (job_id, start_stamp) VALUES (v_job_id, now()); + EXCEPTION + WHEN unique_violation THEN -- <co id="co.plpgsql-porting-exception"/> + -- don't worry if it already exists + END; + COMMIT; +END; +$$ LANGUAGE plpgsql; +</programlisting> + + <calloutlist> + <callout arearefs="co.plpgsql-porting-raise"> + <para> + The syntax of <literal>RAISE</literal> is considerably different from + Oracle's statement, although the basic case <literal>RAISE</literal> + <replaceable class="parameter">exception_name</replaceable> works + similarly. + </para> + </callout> + <callout arearefs="co.plpgsql-porting-exception"> + <para> + The exception names supported by <application>PL/pgSQL</application> are + different from Oracle's. The set of built-in exception names + is much larger (see <xref linkend="errcodes-appendix"/>). There + is not currently a way to declare user-defined exception names, + although you can throw user-chosen SQLSTATE values instead. + </para> + </callout> + </calloutlist> + </para> + </example> + </sect2> + + <sect2 id="plpgsql-porting-other"> + <title>Other Things to Watch For</title> + + <para> + This section explains a few other things to watch for when porting + Oracle <application>PL/SQL</application> functions to + <productname>PostgreSQL</productname>. + </para> + + <sect3 id="plpgsql-porting-exceptions"> + <title>Implicit Rollback after Exceptions</title> + + <para> + In <application>PL/pgSQL</application>, when an exception is caught by an + <literal>EXCEPTION</literal> clause, all database changes since the block's + <literal>BEGIN</literal> are automatically rolled back. That is, the behavior + is equivalent to what you'd get in Oracle with: + +<programlisting> +BEGIN + SAVEPOINT s1; + ... code here ... +EXCEPTION + WHEN ... THEN + ROLLBACK TO s1; + ... code here ... + WHEN ... THEN + ROLLBACK TO s1; + ... code here ... +END; +</programlisting> + + If you are translating an Oracle procedure that uses + <command>SAVEPOINT</command> and <command>ROLLBACK TO</command> in this style, + your task is easy: just omit the <command>SAVEPOINT</command> and + <command>ROLLBACK TO</command>. If you have a procedure that uses + <command>SAVEPOINT</command> and <command>ROLLBACK TO</command> in a different way + then some actual thought will be required. + </para> + </sect3> + + <sect3> + <title><command>EXECUTE</command></title> + + <para> + The <application>PL/pgSQL</application> version of + <command>EXECUTE</command> works similarly to the + <application>PL/SQL</application> version, but you have to remember to use + <function>quote_literal</function> and + <function>quote_ident</function> as described in <xref + linkend="plpgsql-statements-executing-dyn"/>. Constructs of the + type <literal>EXECUTE 'SELECT * FROM $1';</literal> will not work + reliably unless you use these functions. + </para> + </sect3> + + <sect3 id="plpgsql-porting-optimization"> + <title>Optimizing <application>PL/pgSQL</application> Functions</title> + + <para> + <productname>PostgreSQL</productname> gives you two function creation + modifiers to optimize execution: <quote>volatility</quote> (whether + the function always returns the same result when given the same + arguments) and <quote>strictness</quote> (whether the function + returns null if any argument is null). Consult the <xref + linkend="sql-createfunction"/> + reference page for details. + </para> + + <para> + When making use of these optimization attributes, your + <command>CREATE FUNCTION</command> statement might look something + like this: + +<programlisting> +CREATE FUNCTION foo(...) RETURNS integer AS $$ +... +$$ LANGUAGE plpgsql STRICT IMMUTABLE; +</programlisting> + </para> + </sect3> + </sect2> + + <sect2 id="plpgsql-porting-appendix"> + <title>Appendix</title> + + <para> + This section contains the code for a set of Oracle-compatible + <function>instr</function> functions that you can use to simplify + your porting efforts. + </para> + + <indexterm> + <primary><function>instr</function> function</primary> + </indexterm> + +<programlisting><![CDATA[ +-- +-- instr functions that mimic Oracle's counterpart +-- Syntax: instr(string1, string2 [, n [, m]]) +-- where [] denotes optional parameters. +-- +-- Search string1, beginning at the nth character, for the mth occurrence +-- of string2. If n is negative, search backwards, starting at the abs(n)'th +-- character from the end of string1. +-- If n is not passed, assume 1 (search starts at first character). +-- If m is not passed, assume 1 (find first occurrence). +-- Returns starting index of string2 in string1, or 0 if string2 is not found. +-- + +CREATE FUNCTION instr(varchar, varchar) RETURNS integer AS $$ +BEGIN + RETURN instr($1, $2, 1); +END; +$$ LANGUAGE plpgsql STRICT IMMUTABLE; + + +CREATE FUNCTION instr(string varchar, string_to_search_for varchar, + beg_index integer) +RETURNS integer AS $$ +DECLARE + pos integer NOT NULL DEFAULT 0; + temp_str varchar; + beg integer; + length integer; + ss_length integer; +BEGIN + IF beg_index > 0 THEN + temp_str := substring(string FROM beg_index); + pos := position(string_to_search_for IN temp_str); + + IF pos = 0 THEN + RETURN 0; + ELSE + RETURN pos + beg_index - 1; + END IF; + ELSIF beg_index < 0 THEN + ss_length := char_length(string_to_search_for); + length := char_length(string); + beg := length + 1 + beg_index; + + WHILE beg > 0 LOOP + temp_str := substring(string FROM beg FOR ss_length); + IF string_to_search_for = temp_str THEN + RETURN beg; + END IF; + + beg := beg - 1; + END LOOP; + + RETURN 0; + ELSE + RETURN 0; + END IF; +END; +$$ LANGUAGE plpgsql STRICT IMMUTABLE; + + +CREATE FUNCTION instr(string varchar, string_to_search_for varchar, + beg_index integer, occur_index integer) +RETURNS integer AS $$ +DECLARE + pos integer NOT NULL DEFAULT 0; + occur_number integer NOT NULL DEFAULT 0; + temp_str varchar; + beg integer; + i integer; + length integer; + ss_length integer; +BEGIN + IF occur_index <= 0 THEN + RAISE 'argument ''%'' is out of range', occur_index + USING ERRCODE = '22003'; + END IF; + + IF beg_index > 0 THEN + beg := beg_index - 1; + FOR i IN 1..occur_index LOOP + temp_str := substring(string FROM beg + 1); + pos := position(string_to_search_for IN temp_str); + IF pos = 0 THEN + RETURN 0; + END IF; + beg := beg + pos; + END LOOP; + + RETURN beg; + ELSIF beg_index < 0 THEN + ss_length := char_length(string_to_search_for); + length := char_length(string); + beg := length + 1 + beg_index; + + WHILE beg > 0 LOOP + temp_str := substring(string FROM beg FOR ss_length); + IF string_to_search_for = temp_str THEN + occur_number := occur_number + 1; + IF occur_number = occur_index THEN + RETURN beg; + END IF; + END IF; + + beg := beg - 1; + END LOOP; + + RETURN 0; + ELSE + RETURN 0; + END IF; +END; +$$ LANGUAGE plpgsql STRICT IMMUTABLE; +]]> +</programlisting> + </sect2> + + </sect1> + +</chapter> |