diff options
Diffstat (limited to 'doc/src/sgml/ref/create_statistics.sgml')
-rw-r--r-- | doc/src/sgml/ref/create_statistics.sgml | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 0000000..4363be5 --- /dev/null +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -0,0 +1,217 @@ +<!-- +doc/src/sgml/ref/create_statistics.sgml +PostgreSQL documentation +--> + +<refentry id="sql-createstatistics"> + <indexterm zone="sql-createstatistics"> + <primary>CREATE STATISTICS</primary> + </indexterm> + + <refmeta> + <refentrytitle>CREATE STATISTICS</refentrytitle> + <manvolnum>7</manvolnum> + <refmiscinfo>SQL - Language Statements</refmiscinfo> + </refmeta> + + <refnamediv> + <refname>CREATE STATISTICS</refname> + <refpurpose>define extended statistics</refpurpose> + </refnamediv> + + <refsynopsisdiv> +<synopsis> +CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_name</replaceable> + [ ( <replaceable class="parameter">statistics_kind</replaceable> [, ... ] ) ] + ON <replaceable class="parameter">column_name</replaceable>, <replaceable class="parameter">column_name</replaceable> [, ...] + FROM <replaceable class="parameter">table_name</replaceable> +</synopsis> + + </refsynopsisdiv> + + <refsect1 id="sql-createstatistics-description"> + <title>Description</title> + + <para> + <command>CREATE STATISTICS</command> will create a new extended statistics + object tracking data about the specified table, foreign table or + materialized view. The statistics object will be created in the current + database and will be owned by the user issuing the command. + </para> + + <para> + If a schema name is given (for example, <literal>CREATE STATISTICS + myschema.mystat ...</literal>) then the statistics object is created in the + specified schema. Otherwise it is created in the current schema. + The name of the statistics object must be distinct from the name of any + other statistics object in the same schema. + </para> + </refsect1> + + <refsect1> + <title>Parameters</title> + + <variablelist> + + <varlistentry> + <term><literal>IF NOT EXISTS</literal></term> + <listitem> + <para> + Do not throw an error if a statistics object with the same name already + exists. A notice is issued in this case. Note that only the name of + the statistics object is considered here, not the details of its + definition. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">statistics_name</replaceable></term> + <listitem> + <para> + The name (optionally schema-qualified) of the statistics object to be + created. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">statistics_kind</replaceable></term> + <listitem> + <para> + A statistics kind to be computed in this statistics object. + Currently supported kinds are + <literal>ndistinct</literal>, which enables n-distinct statistics, + <literal>dependencies</literal>, which enables functional + dependency statistics, and <literal>mcv</literal> which enables + most-common values lists. + If this clause is omitted, all supported statistics kinds are + included in the statistics object. + For more information, see <xref linkend="planner-stats-extended"/> + and <xref linkend="multivariate-statistics-examples"/>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">column_name</replaceable></term> + <listitem> + <para> + The name of a table column to be covered by the computed statistics. + At least two column names must be given; the order of the column names + is insignificant. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">table_name</replaceable></term> + <listitem> + <para> + The name (optionally schema-qualified) of the table containing the + column(s) the statistics are computed on. + </para> + </listitem> + </varlistentry> + + </variablelist> + </refsect1> + + <refsect1> + <title>Notes</title> + + <para> + You must be the owner of a table to create a statistics object + reading it. Once created, however, the ownership of the statistics + object is independent of the underlying table(s). + </para> + </refsect1> + + <refsect1 id="sql-createstatistics-examples"> + <title>Examples</title> + + <para> + Create table <structname>t1</structname> with two functionally dependent columns, i.e., + knowledge of a value in the first column is sufficient for determining the + value in the other column. Then functional dependency statistics are built + on those columns: + +<programlisting> +CREATE TABLE t1 ( + a int, + b int +); + +INSERT INTO t1 SELECT i/100, i/500 + FROM generate_series(1,1000000) s(i); + +ANALYZE t1; + +-- the number of matching rows will be drastically underestimated: +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); + +CREATE STATISTICS s1 (dependencies) ON a, b FROM t1; + +ANALYZE t1; + +-- now the row count estimate is more accurate: +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); +</programlisting> + + Without functional-dependency statistics, the planner would assume + that the two <literal>WHERE</literal> conditions are independent, and would + multiply their selectivities together to arrive at a much-too-small + row count estimate. + With such statistics, the planner recognizes that the <literal>WHERE</literal> + conditions are redundant and does not underestimate the row count. + </para> + + <para> + Create table <structname>t2</structname> with two perfectly correlated columns + (containing identical data), and a MCV list on those columns: + +<programlisting> +CREATE TABLE t2 ( + a int, + b int +); + +INSERT INTO t2 SELECT mod(i,100), mod(i,100) + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s2 (mcv) ON a, b FROM t2; + +ANALYZE t2; + +-- valid combination (found in MCV) +EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 1); + +-- invalid combination (not found in MCV) +EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2); +</programlisting> + + The MCV list gives the planner more detailed information about the + specific values that commonly appear in the table, as well as an upper + bound on the selectivities of combinations of values that do not appear + in the table, allowing it to generate better estimates in both cases. + </para> + + </refsect1> + + <refsect1> + <title>Compatibility</title> + + <para> + There is no <command>CREATE STATISTICS</command> command in the SQL standard. + </para> + </refsect1> + + <refsect1> + <title>See Also</title> + + <simplelist type="inline"> + <member><xref linkend="sql-alterstatistics"/></member> + <member><xref linkend="sql-dropstatistics"/></member> + </simplelist> + </refsect1> +</refentry> |