From 46651ce6fe013220ed397add242004d764fc0153 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:15:05 +0200 Subject: Adding upstream version 14.5. Signed-off-by: Daniel Baumann --- src/include/statistics/extended_stats_internal.h | 130 +++++++++++++++++++++++ src/include/statistics/statistics.h | 129 ++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 src/include/statistics/extended_stats_internal.h create mode 100644 src/include/statistics/statistics.h (limited to 'src/include/statistics') diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h new file mode 100644 index 0000000..55cd925 --- /dev/null +++ b/src/include/statistics/extended_stats_internal.h @@ -0,0 +1,130 @@ +/*------------------------------------------------------------------------- + * + * extended_stats_internal.h + * POSTGRES extended statistics internal declarations + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/statistics/extended_stats_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXTENDED_STATS_INTERNAL_H +#define EXTENDED_STATS_INTERNAL_H + +#include "statistics/statistics.h" +#include "utils/sortsupport.h" + +typedef struct +{ + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ +} StdAnalyzeData; + +typedef struct +{ + Datum value; /* a data value */ + int tupno; /* position index for tuple it came from */ +} ScalarItem; + +/* (de)serialization info */ +typedef struct DimensionInfo +{ + int nvalues; /* number of deduplicated values */ + int nbytes; /* number of bytes (serialized) */ + int nbytes_aligned; /* size of deserialized data with alignment */ + int typlen; /* pg_type.typlen */ + bool typbyval; /* pg_type.typbyval */ +} DimensionInfo; + +/* multi-sort */ +typedef struct MultiSortSupportData +{ + int ndims; /* number of dimensions */ + /* sort support data for each dimension: */ + SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]; +} MultiSortSupportData; + +typedef MultiSortSupportData *MultiSortSupport; + +typedef struct SortItem +{ + Datum *values; + bool *isnull; + int count; +} SortItem; + +/* a unified representation of the data the statistics is built on */ +typedef struct StatsBuildData +{ + int numrows; + int nattnums; + AttrNumber *attnums; + VacAttrStats **stats; + Datum **values; + bool **nulls; +} StatsBuildData; + + +extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data); +extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); +extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); + +extern MVDependencies *statext_dependencies_build(StatsBuildData *data); +extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); +extern MVDependencies *statext_dependencies_deserialize(bytea *data); + +extern MCVList *statext_mcv_build(StatsBuildData *data, + double totalrows, int stattarget); +extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats); +extern MCVList *statext_mcv_deserialize(bytea *data); + +extern MultiSortSupport multi_sort_init(int ndims); +extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + Oid oper, Oid collation); +extern int multi_sort_compare(const void *a, const void *b, void *arg); +extern int multi_sort_compare_dim(int dim, const SortItem *a, + const SortItem *b, MultiSortSupport mss); +extern int multi_sort_compare_dims(int start, int end, const SortItem *a, + const SortItem *b, MultiSortSupport mss); +extern int compare_scalars_simple(const void *a, const void *b, void *arg); +extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup); + +extern AttrNumber *build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs); + +extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems, + MultiSortSupport mss, + int numattrs, AttrNumber *attnums); + +extern bool examine_opclause_args(List *args, Node **exprp, + Const **cstp, bool *expronleftp); + +extern Selectivity mcv_combine_selectivities(Selectivity simple_sel, + Selectivity mcv_sel, + Selectivity mcv_basesel, + Selectivity mcv_totalsel); + +extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, + StatisticExtInfo *stat, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, + Selectivity *totalsel); + +extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, + StatisticExtInfo *stat, + MCVList *mcv, + Node *clause, + bool **or_matches, + Selectivity *basesel, + Selectivity *overlap_mcvsel, + Selectivity *overlap_basesel, + Selectivity *totalsel); + +#endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h new file mode 100644 index 0000000..326cf26 --- /dev/null +++ b/src/include/statistics/statistics.h @@ -0,0 +1,129 @@ +/*------------------------------------------------------------------------- + * + * statistics.h + * Extended statistics and selectivity estimation functions. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/statistics/statistics.h + * + *------------------------------------------------------------------------- + */ +#ifndef STATISTICS_H +#define STATISTICS_H + +#include "commands/vacuum.h" +#include "nodes/pathnodes.h" + +#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ + +/* Multivariate distinct coefficients */ +#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */ +#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */ + +/* MVNDistinctItem represents a single combination of columns */ +typedef struct MVNDistinctItem +{ + double ndistinct; /* ndistinct value for this combination */ + int nattributes; /* number of attributes */ + AttrNumber *attributes; /* attribute numbers */ +} MVNDistinctItem; + +/* A MVNDistinct object, comprising all possible combinations of columns */ +typedef struct MVNDistinct +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of ndistinct (BASIC) */ + uint32 nitems; /* number of items in the statistic */ + MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]; +} MVNDistinct; + +/* Multivariate functional dependencies */ +#define STATS_DEPS_MAGIC 0xB4549A2C /* marks serialized bytea */ +#define STATS_DEPS_TYPE_BASIC 1 /* basic dependencies type */ + +/* + * Functional dependencies, tracking column-level relationships (values + * in one column determine values in another one). + */ +typedef struct MVDependency +{ + double degree; /* degree of validity (0-1) */ + AttrNumber nattributes; /* number of attributes */ + AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]; /* attribute numbers */ +} MVDependency; + +typedef struct MVDependencies +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of MV Dependencies (BASIC) */ + uint32 ndeps; /* number of dependencies */ + MVDependency *deps[FLEXIBLE_ARRAY_MEMBER]; /* dependencies */ +} MVDependencies; + +/* used to flag stats serialized to bytea */ +#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */ +#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */ + +/* max items in MCV list (should be equal to max default_statistics_target) */ +#define STATS_MCVLIST_MAX_ITEMS 10000 + +/* + * Multivariate MCV (most-common value) lists + * + * A straightforward extension of MCV items - i.e. a list (array) of + * combinations of attribute values, together with a frequency and null flags. + */ +typedef struct MCVItem +{ + double frequency; /* frequency of this combination */ + double base_frequency; /* frequency if independent */ + bool *isnull; /* NULL flags */ + Datum *values; /* item values */ +} MCVItem; + +/* multivariate MCV list - essentially an array of MCV items */ +typedef struct MCVList +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of MCV list (BASIC) */ + uint32 nitems; /* number of MCV items in the array */ + AttrNumber ndimensions; /* number of dimensions */ + Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */ + MCVItem items[FLEXIBLE_ARRAY_MEMBER]; /* array of MCV items */ +} MCVList; + +extern MVNDistinct *statext_ndistinct_load(Oid mvoid); +extern MVDependencies *statext_dependencies_load(Oid mvoid); +extern MCVList *statext_mcv_load(Oid mvoid); + +extern void BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats); +extern int ComputeExtStatisticsRows(Relation onerel, + int natts, VacAttrStats **stats); +extern bool statext_is_kind_built(HeapTuple htup, char kind); +extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses); +extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses, + bool is_or); +extern bool has_stats_of_kind(List *stats, char requiredkind); +extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, + Bitmapset **clause_attnums, + List **clause_exprs, + int nclauses); +extern HeapTuple statext_expressions_load(Oid stxoid, int idx); + +#endif /* STATISTICS_H */ -- cgit v1.2.3