summaryrefslogtreecommitdiffstats
path: root/src/include/statistics
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/include/statistics
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/statistics')
-rw-r--r--src/include/statistics/extended_stats_internal.h130
-rw-r--r--src/include/statistics/statistics.h129
2 files changed, 259 insertions, 0 deletions
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
new file mode 100644
index 0000000..55cd925
--- /dev/null
+++ b/src/include/statistics/extended_stats_internal.h
@@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats_internal.h
+ * POSTGRES extended statistics internal declarations
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/include/statistics/extended_stats_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef EXTENDED_STATS_INTERNAL_H
+#define EXTENDED_STATS_INTERNAL_H
+
+#include "statistics/statistics.h"
+#include "utils/sortsupport.h"
+
+typedef struct
+{
+ Oid eqopr; /* '=' operator for datatype, if any */
+ Oid eqfunc; /* and associated function */
+ Oid ltopr; /* '<' operator for datatype, if any */
+} StdAnalyzeData;
+
+typedef struct
+{
+ Datum value; /* a data value */
+ int tupno; /* position index for tuple it came from */
+} ScalarItem;
+
+/* (de)serialization info */
+typedef struct DimensionInfo
+{
+ int nvalues; /* number of deduplicated values */
+ int nbytes; /* number of bytes (serialized) */
+ int nbytes_aligned; /* size of deserialized data with alignment */
+ int typlen; /* pg_type.typlen */
+ bool typbyval; /* pg_type.typbyval */
+} DimensionInfo;
+
+/* multi-sort */
+typedef struct MultiSortSupportData
+{
+ int ndims; /* number of dimensions */
+ /* sort support data for each dimension: */
+ SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER];
+} MultiSortSupportData;
+
+typedef MultiSortSupportData *MultiSortSupport;
+
+typedef struct SortItem
+{
+ Datum *values;
+ bool *isnull;
+ int count;
+} SortItem;
+
+/* a unified representation of the data the statistics is built on */
+typedef struct StatsBuildData
+{
+ int numrows;
+ int nattnums;
+ AttrNumber *attnums;
+ VacAttrStats **stats;
+ Datum **values;
+ bool **nulls;
+} StatsBuildData;
+
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
+extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
+extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
+
+extern MVDependencies *statext_dependencies_build(StatsBuildData *data);
+extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
+extern MVDependencies *statext_dependencies_deserialize(bytea *data);
+
+extern MCVList *statext_mcv_build(StatsBuildData *data,
+ double totalrows, int stattarget);
+extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
+extern MCVList *statext_mcv_deserialize(bytea *data);
+
+extern MultiSortSupport multi_sort_init(int ndims);
+extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+ Oid oper, Oid collation);
+extern int multi_sort_compare(const void *a, const void *b, void *arg);
+extern int multi_sort_compare_dim(int dim, const SortItem *a,
+ const SortItem *b, MultiSortSupport mss);
+extern int multi_sort_compare_dims(int start, int end, const SortItem *a,
+ const SortItem *b, MultiSortSupport mss);
+extern int compare_scalars_simple(const void *a, const void *b, void *arg);
+extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
+
+extern AttrNumber *build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs);
+
+extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
+ MultiSortSupport mss,
+ int numattrs, AttrNumber *attnums);
+
+extern bool examine_opclause_args(List *args, Node **exprp,
+ Const **cstp, bool *expronleftp);
+
+extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
+ Selectivity mcv_sel,
+ Selectivity mcv_basesel,
+ Selectivity mcv_totalsel);
+
+extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
+ StatisticExtInfo *stat,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Selectivity *basesel,
+ Selectivity *totalsel);
+
+extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
+ StatisticExtInfo *stat,
+ MCVList *mcv,
+ Node *clause,
+ bool **or_matches,
+ Selectivity *basesel,
+ Selectivity *overlap_mcvsel,
+ Selectivity *overlap_basesel,
+ Selectivity *totalsel);
+
+#endif /* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
new file mode 100644
index 0000000..326cf26
--- /dev/null
+++ b/src/include/statistics/statistics.h
@@ -0,0 +1,129 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics.h
+ * Extended statistics and selectivity estimation functions.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/statistics/statistics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_H
+#define STATISTICS_H
+
+#include "commands/vacuum.h"
+#include "nodes/pathnodes.h"
+
+#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */
+
+/* Multivariate distinct coefficients */
+#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */
+#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */
+
+/* MVNDistinctItem represents a single combination of columns */
+typedef struct MVNDistinctItem
+{
+ double ndistinct; /* ndistinct value for this combination */
+ int nattributes; /* number of attributes */
+ AttrNumber *attributes; /* attribute numbers */
+} MVNDistinctItem;
+
+/* A MVNDistinct object, comprising all possible combinations of columns */
+typedef struct MVNDistinct
+{
+ uint32 magic; /* magic constant marker */
+ uint32 type; /* type of ndistinct (BASIC) */
+ uint32 nitems; /* number of items in the statistic */
+ MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER];
+} MVNDistinct;
+
+/* Multivariate functional dependencies */
+#define STATS_DEPS_MAGIC 0xB4549A2C /* marks serialized bytea */
+#define STATS_DEPS_TYPE_BASIC 1 /* basic dependencies type */
+
+/*
+ * Functional dependencies, tracking column-level relationships (values
+ * in one column determine values in another one).
+ */
+typedef struct MVDependency
+{
+ double degree; /* degree of validity (0-1) */
+ AttrNumber nattributes; /* number of attributes */
+ AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]; /* attribute numbers */
+} MVDependency;
+
+typedef struct MVDependencies
+{
+ uint32 magic; /* magic constant marker */
+ uint32 type; /* type of MV Dependencies (BASIC) */
+ uint32 ndeps; /* number of dependencies */
+ MVDependency *deps[FLEXIBLE_ARRAY_MEMBER]; /* dependencies */
+} MVDependencies;
+
+/* used to flag stats serialized to bytea */
+#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */
+#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */
+
+/* max items in MCV list (should be equal to max default_statistics_target) */
+#define STATS_MCVLIST_MAX_ITEMS 10000
+
+/*
+ * Multivariate MCV (most-common value) lists
+ *
+ * A straightforward extension of MCV items - i.e. a list (array) of
+ * combinations of attribute values, together with a frequency and null flags.
+ */
+typedef struct MCVItem
+{
+ double frequency; /* frequency of this combination */
+ double base_frequency; /* frequency if independent */
+ bool *isnull; /* NULL flags */
+ Datum *values; /* item values */
+} MCVItem;
+
+/* multivariate MCV list - essentially an array of MCV items */
+typedef struct MCVList
+{
+ uint32 magic; /* magic constant marker */
+ uint32 type; /* type of MCV list (BASIC) */
+ uint32 nitems; /* number of MCV items in the array */
+ AttrNumber ndimensions; /* number of dimensions */
+ Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */
+ MCVItem items[FLEXIBLE_ARRAY_MEMBER]; /* array of MCV items */
+} MCVList;
+
+extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
+extern MVDependencies *statext_dependencies_load(Oid mvoid);
+extern MCVList *statext_mcv_load(Oid mvoid);
+
+extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
+ int numrows, HeapTuple *rows,
+ int natts, VacAttrStats **vacattrstats);
+extern int ComputeExtStatisticsRows(Relation onerel,
+ int natts, VacAttrStats **stats);
+extern bool statext_is_kind_built(HeapTuple htup, char kind);
+extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Bitmapset **estimatedclauses);
+extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Bitmapset **estimatedclauses,
+ bool is_or);
+extern bool has_stats_of_kind(List *stats, char requiredkind);
+extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
+ Bitmapset **clause_attnums,
+ List **clause_exprs,
+ int nclauses);
+extern HeapTuple statext_expressions_load(Oid stxoid, int idx);
+
+#endif /* STATISTICS_H */