1 files changed, 217 insertions, 0 deletions
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
new file mode 100644
index 0000000..4363be5
--- /dev/null
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -0,0 +1,217 @@
+<!--
+doc/src/sgml/ref/create_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="sql-createstatistics">
+ <indexterm zone="sql-createstatistics">
+  <primary>CREATE STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE STATISTICS</refname>
+  <refpurpose>define extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_name</replaceable>
+    [ ( <replaceable class="parameter">statistics_kind</replaceable> [, ... ] ) ]
+    ON <replaceable class="parameter">column_name</replaceable>, <replaceable class="parameter">column_name</replaceable> [, ...]
+    FROM <replaceable class="parameter">table_name</replaceable>
+</synopsis>
+
+ </refsynopsisdiv>
+
+ <refsect1 id="sql-createstatistics-description">
+  <title>Description</title>
+
+  <para>
+   <command>CREATE STATISTICS</command> will create a new extended statistics
+   object tracking data about the specified table, foreign table or
+   materialized view.  The statistics object will be created in the current
+   database and will be owned by the user issuing the command.
+  </para>
+
+  <para>
+   If a schema name is given (for example, <literal>CREATE STATISTICS
+   myschema.mystat ...</literal>) then the statistics object is created in the
+   specified schema.  Otherwise it is created in the current schema.
+   The name of the statistics object must be distinct from the name of any
+   other statistics object in the same schema.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+
+   <varlistentry>
+    <term><literal>IF NOT EXISTS</literal></term>
+    <listitem>
+     <para>
+      Do not throw an error if a statistics object with the same name already
+      exists.  A notice is issued in this case.  Note that only the name of
+      the statistics object is considered here, not the details of its
+      definition.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">statistics_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics object to be
+      created.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">statistics_kind</replaceable></term>
+    <listitem>
+     <para>
+      A statistics kind to be computed in this statistics object.
+      Currently supported kinds are
+      <literal>ndistinct</literal>, which enables n-distinct statistics,
+      <literal>dependencies</literal>, which enables functional
+      dependency statistics, and <literal>mcv</literal> which enables
+      most-common values lists.
+      If this clause is omitted, all supported statistics kinds are
+      included in the statistics object.
+      For more information, see <xref linkend="planner-stats-extended"/>
+      and <xref linkend="multivariate-statistics-examples"/>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">column_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a table column to be covered by the computed statistics.
+      At least two column names must be given;  the order of the column names
+      is insignificant.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">table_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the table containing the
+      column(s) the statistics are computed on.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   You must be the owner of a table to create a statistics object
+   reading it.  Once created, however, the ownership of the statistics
+   object is independent of the underlying table(s).
+  </para>
+ </refsect1>
+
+ <refsect1 id="sql-createstatistics-examples">
+  <title>Examples</title>
+
+  <para>
+   Create table <structname>t1</structname> with two functionally dependent columns, i.e.,
+   knowledge of a value in the first column is sufficient for determining the
+   value in the other column. Then functional dependency statistics are built
+   on those columns:
+
+<programlisting>
+CREATE TABLE t1 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t1 SELECT i/100, i/500
+                 FROM generate_series(1,1000000) s(i);
+
+ANALYZE t1;
+
+-- the number of matching rows will be drastically underestimated:
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+
+CREATE STATISTICS s1 (dependencies) ON a, b FROM t1;
+
+ANALYZE t1;
+
+-- now the row count estimate is more accurate:
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+</programlisting>
+
+   Without functional-dependency statistics, the planner would assume
+   that the two <literal>WHERE</literal> conditions are independent, and would
+   multiply their selectivities together to arrive at a much-too-small
+   row count estimate.
+   With such statistics, the planner recognizes that the <literal>WHERE</literal>
+   conditions are redundant and does not underestimate the row count.
+  </para>
+
+  <para>
+   Create table <structname>t2</structname> with two perfectly correlated columns
+   (containing identical data), and a MCV list on those columns:
+
+<programlisting>
+CREATE TABLE t2 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t2 SELECT mod(i,100), mod(i,100)
+                 FROM generate_series(1,1000000) s(i);
+
+CREATE STATISTICS s2 (mcv) ON a, b FROM t2;
+
+ANALYZE t2;
+
+-- valid combination (found in MCV)
+EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 1);
+
+-- invalid combination (not found in MCV)
+EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2);
+</programlisting>
+
+   The MCV list gives the planner more detailed information about the
+   specific values that commonly appear in the table, as well as an upper
+   bound on the selectivities of combinations of values that do not appear
+   in the table, allowing it to generate better estimates in both cases.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There is no <command>CREATE STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"/></member>
+   <member><xref linkend="sql-dropstatistics"/></member>
+  </simplelist>
+ </refsect1>
+</refentry>