diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/statistics | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/statistics')
-rw-r--r-- | src/backend/statistics/Makefile | 21 | ||||
-rw-r--r-- | src/backend/statistics/README | 101 | ||||
-rw-r--r-- | src/backend/statistics/README.dependencies | 116 | ||||
-rw-r--r-- | src/backend/statistics/README.mcv | 103 | ||||
-rw-r--r-- | src/backend/statistics/dependencies.c | 1831 | ||||
-rw-r--r-- | src/backend/statistics/extended_stats.c | 2662 | ||||
-rw-r--r-- | src/backend/statistics/mcv.c | 2179 | ||||
-rw-r--r-- | src/backend/statistics/meson.build | 8 | ||||
-rw-r--r-- | src/backend/statistics/mvdistinct.c | 700 |
9 files changed, 7721 insertions, 0 deletions
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile new file mode 100644 index 0000000..89cf8c2 --- /dev/null +++ b/src/backend/statistics/Makefile @@ -0,0 +1,21 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for statistics +# +# IDENTIFICATION +# src/backend/statistics/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/statistics +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + dependencies.o \ + extended_stats.o \ + mcv.o \ + mvdistinct.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/statistics/README b/src/backend/statistics/README new file mode 100644 index 0000000..13a97a3 --- /dev/null +++ b/src/backend/statistics/README @@ -0,0 +1,101 @@ +Extended statistics +=================== + +When estimating various quantities (e.g. condition selectivities) the default +approach relies on the assumption of independence. In practice that's often +not true, resulting in estimation errors. + +Extended statistics track different types of dependencies between the columns, +hopefully improving the estimates and producing better plans. + + +Types of statistics +------------------- + +There are currently several kinds of extended statistics: + + (a) ndistinct coefficients + + (b) soft functional dependencies (README.dependencies) + + (c) MCV lists (README.mcv) + + +Compatible clause types +----------------------- + +Each type of statistics may be used to estimate some subset of clause types. + + (a) functional dependencies - equality clauses (AND), possibly IS NULL + + (b) MCV lists - equality and inequality clauses (AND, OR, NOT), IS [NOT] NULL + +Currently, only OpExprs in the form Var op Const, or Const op Var are +supported, however it's feasible to expand the code later to also estimate the +selectivities on clauses such as Var op Var. + + +Complex clauses +--------------- + +We also support estimating more complex clauses - essentially AND/OR clauses +with (Var op Const) as leaves, as long as all the referenced attributes are +covered by a single statistics object. + +For example this condition + + (a=1) AND ((b=2) OR ((c=3) AND (d=4))) + +may be estimated using statistics on (a,b,c,d). If we only have statistics on +(b,c,d) we may estimate the second part, and estimate (a=1) using simple stats. + +If we only have statistics on (a,b,c) we can't apply it at all at this point, +but it's worth pointing out clauselist_selectivity() works recursively and when +handling the second part (the OR-clause), we'll be able to apply the statistics. + +Note: The multi-statistics estimation patch also makes it possible to pass some +clauses as 'conditions' into the deeper parts of the expression tree. + + +Selectivity estimation +---------------------- + +Throughout the planner clauselist_selectivity() still remains in charge of +most selectivity estimate requests. clauselist_selectivity() can be instructed +to try to make use of any extended statistics on the given RelOptInfo, which +it will do if: + + (a) An actual valid RelOptInfo was given. Join relations are passed in as + NULL, therefore are invalid. + + (b) The relation given actually has any extended statistics defined which + are actually built. + +When the above conditions are met, clauselist_selectivity() first attempts to +pass the clause list off to the extended statistics selectivity estimation +function. This function may not find any clauses which it can perform any +estimations on. In such cases, these clauses are simply ignored. When actual +estimation work is performed in these functions they're expected to mark which +clauses they've performed estimations for so that any other function +performing estimations knows which clauses are to be skipped. + +Size of sample in ANALYZE +------------------------- + +When performing ANALYZE, the number of rows to sample is determined as + + (300 * statistics_target) + +That works reasonably well for statistics on individual columns, but perhaps +it's not enough for extended statistics. Papers analyzing estimation errors +all use samples proportional to the table (usually finding that 1-3% of the +table is enough to build accurate stats). + +The requested accuracy (number of MCV items or histogram bins) should also +be considered when determining the sample size, and in extended statistics +those are not necessarily limited by statistics_target. + +This however merits further discussion, because collecting the sample is quite +expensive and increasing it further would make ANALYZE even more painful. +Judging by the experiments with the current implementation, the fixed size +seems to work reasonably well for now, so we leave this as future work. diff --git a/src/backend/statistics/README.dependencies b/src/backend/statistics/README.dependencies new file mode 100644 index 0000000..6c446bd --- /dev/null +++ b/src/backend/statistics/README.dependencies @@ -0,0 +1,116 @@ +Soft functional dependencies +============================ + +Functional dependencies are a concept well described in relational theory, +particularly in the definition of normalization and "normal forms". Wikipedia +has a nice definition of a functional dependency [1]: + + In a given table, an attribute Y is said to have a functional dependency + on a set of attributes X (written X -> Y) if and only if each X value is + associated with precisely one Y value. For example, in an "Employee" + table that includes the attributes "Employee ID" and "Employee Date of + Birth", the functional dependency + + {Employee ID} -> {Employee Date of Birth} + + would hold. It follows from the previous two sentences that each + {Employee ID} is associated with precisely one {Employee Date of Birth}. + + [1] https://en.wikipedia.org/wiki/Functional_dependency + +In practical terms, functional dependencies mean that a value in one column +determines values in some other column. Consider for example this trivial +table with two integer columns: + + CREATE TABLE t (a INT, b INT) + AS SELECT i, i/10 FROM generate_series(1,100000) s(i); + +Clearly, knowledge of the value in column 'a' is sufficient to determine the +value in column 'b', as it's simply (a/10). A more practical example may be +addresses, where the knowledge of a ZIP code (usually) determines city. Larger +cities may have multiple ZIP codes, so the dependency can't be reversed. + +Many datasets might be normalized not to contain such dependencies, but often +it's not practical for various reasons. In some cases, it's actually a conscious +design choice to model the dataset in a denormalized way, either because of +performance or to make querying easier. + + +Soft dependencies +----------------- + +Real-world data sets often contain data errors, either because of data entry +mistakes (user mistyping the ZIP code) or perhaps issues in generating the +data (e.g. a ZIP code mistakenly assigned to two cities in different states). + +A strict implementation would either ignore dependencies in such cases, +rendering the approach mostly useless even for slightly noisy data sets, or +result in sudden changes in behavior depending on minor differences between +samples provided to ANALYZE. + +For this reason, extended statistics implement "soft" functional dependencies, +associating each functional dependency with a degree of validity (a number +between 0 and 1). This degree is then used to combine selectivities in a +smooth manner. + + +Mining dependencies (ANALYZE) +----------------------------- + +The current algorithm is fairly simple - generate all possible functional +dependencies, and for each one count the number of rows consistent with it. +Then use the fraction of rows (supporting/total) as the degree. + +To count the rows consistent with the dependency (a => b): + + (a) Sort the data lexicographically, i.e. first by 'a' then 'b'. + + (b) For each group of rows with the same 'a' value, count the number of + distinct values in 'b'. + + (c) If there's a single distinct value in 'b', the rows are consistent with + the functional dependency, otherwise they contradict it. + + +Clause reduction (planner/optimizer) +------------------------------------ + +Applying the functional dependencies is fairly simple: given a list of +equality clauses, we compute selectivities of each clause and then use the +degree to combine them using this formula + + P(a=?,b=?) = P(a=?) * (d + (1-d) * P(b=?)) + +Where 'd' is the degree of functional dependency (a => b). + +With more than two equality clauses, this process happens recursively. For +example for (a,b,c) we first use (a,b => c) to break the computation into + + P(a=?,b=?,c=?) = P(a=?,b=?) * (e + (1-e) * P(c=?)) + +where 'e' is the degree of functional dependency (a,b => c); then we can +apply (a=>b) the same way on P(a=?,b=?). + + +Consistency of clauses +---------------------- + +Functional dependencies only express general dependencies between columns, +without referencing particular values. This assumes that the equality clauses +are in fact consistent with the functional dependency, i.e. that given a +dependency (a=>b), the value in (b=?) clause is the value determined by (a=?). +If that's not the case, the clauses are "inconsistent" with the functional +dependency and the result will be over-estimation. + +This may happen, for example, when using conditions on the ZIP code and city +name with mismatching values (ZIP code for a different city), etc. In such a +case, the result set will be empty, but we'll estimate the selectivity using +the ZIP code condition. + +In this case, the default estimation based on AVIA principle happens to work +better, but mostly by chance. + +This issue is the price for the simplicity of functional dependencies. If the +application frequently constructs queries with clauses inconsistent with +functional dependencies present in the data, the best solution is not to +use functional dependencies, but one of the more complex types of statistics. diff --git a/src/backend/statistics/README.mcv b/src/backend/statistics/README.mcv new file mode 100644 index 0000000..a918fb5 --- /dev/null +++ b/src/backend/statistics/README.mcv @@ -0,0 +1,103 @@ +MCV lists +========= + +Multivariate MCV (most-common values) lists are a straightforward extension of +regular MCV lists, tracking most frequent combinations of values for a group of +attributes. + +This works particularly well for columns with a small number of distinct values, +as the list may include all the combinations and approximate the distribution +very accurately. + +For columns with a large number of distinct values (e.g. those with continuous +domains), the list will only track the most frequent combinations. If the +distribution is mostly uniform (all combinations about equally frequent), the +MCV list will be empty. + +Estimates of some clauses (e.g. equality) based on MCV lists are more accurate +than when using histograms. + +Also, MCV lists don't necessarily require sorting of the values (the fact that +we use sorting when building them is an implementation detail), but even more +importantly the ordering is not built into the approximation (while histograms +are built on ordering). So MCV lists work well even for attributes where the +ordering of the data type is disconnected from the meaning of the data. For +example we know how to sort strings, but it's unlikely to make much sense for +city names (or other label-like attributes). + + +Selectivity estimation +---------------------- + +The estimation, implemented in mcv_clauselist_selectivity(), is quite simple +in principle - we need to identify MCV items matching all the clauses and sum +frequencies of all those items. + +Currently MCV lists support estimation of the following clause types: + + (a) equality clauses WHERE (a = 1) AND (b = 2) + (b) inequality clauses WHERE (a < 1) AND (b >= 2) + (c) NULL clauses WHERE (a IS NULL) AND (b IS NOT NULL) + (d) OR clauses WHERE (a < 1) OR (b >= 2) + +It's possible to add support for additional clauses, for example: + + (e) multi-var clauses WHERE (a > b) + +and possibly others. These are tasks for the future, not yet implemented. + + +Hashed MCV (not yet implemented) +-------------------------------- + +Regular MCV lists have to include actual values for each item, so if those items +are large the list may be quite large. This is especially true for multivariate +MCV lists, although the current implementation partially mitigates this by +de-duplicating the values before storing them on disk. + +It's possible to only store hashes (32-bit values) instead of the actual values, +significantly reducing the space requirements. Obviously, this would only make +the MCV lists useful for estimating equality conditions (assuming the 32-bit +hashes make the collisions rare enough). + +This might also complicate matching the columns to available stats. + + +TODO Consider implementing hashed MCV list, storing just 32-bit hashes instead + of the actual values. This type of MCV list will be useful only for + estimating equality clauses, and will reduce space requirements for large + varlena types (in such cases we usually only want equality anyway). + + +Inspecting the MCV list +----------------------- + +Inspecting the regular (per-attribute) MCV lists is trivial, as it's enough +to select the columns from pg_stats. The data is encoded as anyarrays, and +all the items have the same data type, so anyarray provides a simple way to +get a text representation. + +With multivariate MCV lists, the columns may use different data types, making +it impossible to use anyarrays. It might be possible to produce a similar +array-like representation, but that would complicate further processing and +analysis of the MCV list. + +So instead the MCV lists are stored in a custom data type (pg_mcv_list), +which however makes it more difficult to inspect the contents. To make that +easier, there's a SRF returning detailed information about the MCV lists. + + SELECT m.* FROM pg_statistic_ext s, + pg_statistic_ext_data d, + pg_mcv_list_items(stxdmcv) m + WHERE s.stxname = 'stts2' + AND d.stxoid = s.oid; + +It accepts one parameter - a pg_mcv_list value (which can only be obtained +from pg_statistic_ext_data catalog, to defend against malicious input), and +returns these columns: + + - item index (0, ..., (nitems-1)) + - values (string array) + - nulls only (boolean array) + - frequency (double precision) + - base_frequency (double precision) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c new file mode 100644 index 0000000..edb2e53 --- /dev/null +++ b/src/backend/statistics/dependencies.c @@ -0,0 +1,1831 @@ +/*------------------------------------------------------------------------- + * + * dependencies.c + * POSTGRES functional dependencies + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/dependencies.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "lib/stringinfo.h" +#include "nodes/nodeFuncs.h" +#include "nodes/nodes.h" +#include "nodes/pathnodes.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "parser/parsetree.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/bytea.h" +#include "utils/fmgroids.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/selfuncs.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* size of the struct header fields (magic, type, ndeps) */ +#define SizeOfHeader (3 * sizeof(uint32)) + +/* size of a serialized dependency (degree, natts, atts) */ +#define SizeOfItem(natts) \ + (sizeof(double) + sizeof(AttrNumber) * (1 + (natts))) + +/* minimal size of a dependency (with two attributes) */ +#define MinSizeOfItem SizeOfItem(2) + +/* minimal size of dependencies, when all deps are minimal */ +#define MinSizeOfItems(ndeps) \ + (SizeOfHeader + (ndeps) * MinSizeOfItem) + +/* + * Internal state for DependencyGenerator of dependencies. Dependencies are similar to + * k-permutations of n elements, except that the order does not matter for the + * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent. + */ +typedef struct DependencyGeneratorData +{ + int k; /* size of the dependency */ + int n; /* number of possible attributes */ + int current; /* next dependency to return (index) */ + AttrNumber ndependencies; /* number of dependencies generated */ + AttrNumber *dependencies; /* array of pre-generated dependencies */ +} DependencyGeneratorData; + +typedef DependencyGeneratorData *DependencyGenerator; + +static void generate_dependencies_recurse(DependencyGenerator state, + int index, AttrNumber start, AttrNumber *current); +static void generate_dependencies(DependencyGenerator state); +static DependencyGenerator DependencyGenerator_init(int n, int k); +static void DependencyGenerator_free(DependencyGenerator state); +static AttrNumber *DependencyGenerator_next(DependencyGenerator state); +static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency); +static bool dependency_is_fully_matched(MVDependency *dependency, + Bitmapset *attnums); +static bool dependency_is_compatible_clause(Node *clause, Index relid, + AttrNumber *attnum); +static bool dependency_is_compatible_expression(Node *clause, Index relid, + List *statlist, Node **expr); +static MVDependency *find_strongest_dependency(MVDependencies **dependencies, + int ndependencies, Bitmapset *attnums); +static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, + int varRelid, JoinType jointype, + SpecialJoinInfo *sjinfo, + MVDependency **dependencies, + int ndependencies, + AttrNumber *list_attnums, + Bitmapset **estimatedclauses); + +static void +generate_dependencies_recurse(DependencyGenerator state, int index, + AttrNumber start, AttrNumber *current) +{ + /* + * The generator handles the first (k-1) elements differently from the + * last element. + */ + if (index < (state->k - 1)) + { + AttrNumber i; + + /* + * The first (k-1) values have to be in ascending order, which we + * generate recursively. + */ + + for (i = start; i < state->n; i++) + { + current[index] = i; + generate_dependencies_recurse(state, (index + 1), (i + 1), current); + } + } + else + { + int i; + + /* + * the last element is the implied value, which does not respect the + * ascending order. We just need to check that the value is not in the + * first (k-1) elements. + */ + + for (i = 0; i < state->n; i++) + { + int j; + bool match = false; + + current[index] = i; + + for (j = 0; j < index; j++) + { + if (current[j] == i) + { + match = true; + break; + } + } + + /* + * If the value is not found in the first part of the dependency, + * we're done. + */ + if (!match) + { + state->dependencies = (AttrNumber *) repalloc(state->dependencies, + state->k * (state->ndependencies + 1) * sizeof(AttrNumber)); + memcpy(&state->dependencies[(state->k * state->ndependencies)], + current, state->k * sizeof(AttrNumber)); + state->ndependencies++; + } + } + } +} + +/* generate all dependencies (k-permutations of n elements) */ +static void +generate_dependencies(DependencyGenerator state) +{ + AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k); + + generate_dependencies_recurse(state, 0, 0, current); + + pfree(current); +} + +/* + * initialize the DependencyGenerator of variations, and prebuild the variations + * + * This pre-builds all the variations. We could also generate them in + * DependencyGenerator_next(), but this seems simpler. + */ +static DependencyGenerator +DependencyGenerator_init(int n, int k) +{ + DependencyGenerator state; + + Assert((n >= k) && (k > 0)); + + /* allocate the DependencyGenerator state */ + state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData)); + state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber)); + + state->ndependencies = 0; + state->current = 0; + state->k = k; + state->n = n; + + /* now actually pre-generate all the variations */ + generate_dependencies(state); + + return state; +} + +/* free the DependencyGenerator state */ +static void +DependencyGenerator_free(DependencyGenerator state) +{ + pfree(state->dependencies); + pfree(state); +} + +/* generate next combination */ +static AttrNumber * +DependencyGenerator_next(DependencyGenerator state) +{ + if (state->current == state->ndependencies) + return NULL; + + return &state->dependencies[state->k * state->current++]; +} + + +/* + * validates functional dependency on the data + * + * An actual work horse of detecting functional dependencies. Given a variation + * of k attributes, it checks that the first (k-1) are sufficient to determine + * the last one. + */ +static double +dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency) +{ + int i, + nitems; + MultiSortSupport mss; + SortItem *items; + AttrNumber *attnums_dep; + + /* counters valid within a group */ + int group_size = 0; + int n_violations = 0; + + /* total number of rows supporting (consistent with) the dependency */ + int n_supporting_rows = 0; + + /* Make sure we have at least two input attributes. */ + Assert(k >= 2); + + /* sort info for all attributes columns */ + mss = multi_sort_init(k); + + /* + * Translate the array of indexes to regular attnums for the dependency + * (we will need this to identify the columns in StatsBuildData). + */ + attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber)); + for (i = 0; i < k; i++) + attnums_dep[i] = data->attnums[dependency[i]]; + + /* + * Verify the dependency (a,b,...)->z, using a rather simple algorithm: + * + * (a) sort the data lexicographically + * + * (b) split the data into groups by first (k-1) columns + * + * (c) for each group count different values in the last column + * + * We use the column data types' default sort operators and collations; + * perhaps at some point it'd be worth using column-specific collations? + */ + + /* prepare the sort function for the dimensions */ + for (i = 0; i < k; i++) + { + VacAttrStats *colstat = data->stats[dependency[i]]; + TypeCacheEntry *type; + + type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + colstat->attrtypid); + + /* prepare the sort function for this dimension */ + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); + } + + /* + * build an array of SortItem(s) sorted using the multi-sort support + * + * XXX This relies on all stats entries pointing to the same tuple + * descriptor. For now that assumption holds, but it might change in the + * future for example if we support statistics on multiple tables. + */ + items = build_sorted_items(data, &nitems, mss, k, attnums_dep); + + /* + * Walk through the sorted array, split it into rows according to the + * first (k-1) columns. If there's a single value in the last column, we + * count the group as 'supporting' the functional dependency. Otherwise we + * count it as contradicting. + */ + + /* start with the first row forming a group */ + group_size = 1; + + /* loop 1 beyond the end of the array so that we count the final group */ + for (i = 1; i <= nitems; i++) + { + /* + * Check if the group ended, which may be either because we processed + * all the items (i==nitems), or because the i-th item is not equal to + * the preceding one. + */ + if (i == nitems || + multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0) + { + /* + * If no violations were found in the group then track the rows of + * the group as supporting the functional dependency. + */ + if (n_violations == 0) + n_supporting_rows += group_size; + + /* Reset counters for the new group */ + n_violations = 0; + group_size = 1; + continue; + } + /* first columns match, but the last one does not (so contradicting) */ + else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0) + n_violations++; + + group_size++; + } + + /* Compute the 'degree of validity' as (supporting/total). */ + return (n_supporting_rows * 1.0 / data->numrows); +} + +/* + * detects functional dependencies between groups of columns + * + * Generates all possible subsets of columns (variations) and computes + * the degree of validity for each one. For example when creating statistics + * on three columns (a,b,c) there are 9 possible dependencies + * + * two columns three columns + * ----------- ------------- + * (a) -> b (a,b) -> c + * (a) -> c (a,c) -> b + * (b) -> a (b,c) -> a + * (b) -> c + * (c) -> a + * (c) -> b + */ +MVDependencies * +statext_dependencies_build(StatsBuildData *data) +{ + int i, + k; + + /* result */ + MVDependencies *dependencies = NULL; + MemoryContext cxt; + + Assert(data->nattnums >= 2); + + /* tracks memory allocated by dependency_degree calls */ + cxt = AllocSetContextCreate(CurrentMemoryContext, + "dependency_degree cxt", + ALLOCSET_DEFAULT_SIZES); + + /* + * We'll try build functional dependencies starting from the smallest ones + * covering just 2 columns, to the largest ones, covering all columns + * included in the statistics object. We start from the smallest ones + * because we want to be able to skip already implied ones. + */ + for (k = 2; k <= data->nattnums; k++) + { + AttrNumber *dependency; /* array with k elements */ + + /* prepare a DependencyGenerator of variation */ + DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k); + + /* generate all possible variations of k values (out of n) */ + while ((dependency = DependencyGenerator_next(DependencyGenerator))) + { + double degree; + MVDependency *d; + MemoryContext oldcxt; + + /* release memory used by dependency degree calculation */ + oldcxt = MemoryContextSwitchTo(cxt); + + /* compute how valid the dependency seems */ + degree = dependency_degree(data, k, dependency); + + MemoryContextSwitchTo(oldcxt); + MemoryContextReset(cxt); + + /* + * if the dependency seems entirely invalid, don't store it + */ + if (degree == 0.0) + continue; + + d = (MVDependency *) palloc0(offsetof(MVDependency, attributes) + + k * sizeof(AttrNumber)); + + /* copy the dependency (and keep the indexes into stxkeys) */ + d->degree = degree; + d->nattributes = k; + for (i = 0; i < k; i++) + d->attributes[i] = data->attnums[dependency[i]]; + + /* initialize the list of dependencies */ + if (dependencies == NULL) + { + dependencies + = (MVDependencies *) palloc0(sizeof(MVDependencies)); + + dependencies->magic = STATS_DEPS_MAGIC; + dependencies->type = STATS_DEPS_TYPE_BASIC; + dependencies->ndeps = 0; + } + + dependencies->ndeps++; + dependencies = (MVDependencies *) repalloc(dependencies, + offsetof(MVDependencies, deps) + + dependencies->ndeps * sizeof(MVDependency *)); + + dependencies->deps[dependencies->ndeps - 1] = d; + } + + /* + * we're done with variations of k elements, so free the + * DependencyGenerator + */ + DependencyGenerator_free(DependencyGenerator); + } + + MemoryContextDelete(cxt); + + return dependencies; +} + + +/* + * Serialize list of dependencies into a bytea value. + */ +bytea * +statext_dependencies_serialize(MVDependencies *dependencies) +{ + int i; + bytea *output; + char *tmp; + Size len; + + /* we need to store ndeps, with a number of attributes for each one */ + len = VARHDRSZ + SizeOfHeader; + + /* and also include space for the actual attribute numbers and degrees */ + for (i = 0; i < dependencies->ndeps; i++) + len += SizeOfItem(dependencies->deps[i]->nattributes); + + output = (bytea *) palloc0(len); + SET_VARSIZE(output, len); + + tmp = VARDATA(output); + + /* Store the base struct values (magic, type, ndeps) */ + memcpy(tmp, &dependencies->magic, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(tmp, &dependencies->type, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(tmp, &dependencies->ndeps, sizeof(uint32)); + tmp += sizeof(uint32); + + /* store number of attributes and attribute numbers for each dependency */ + for (i = 0; i < dependencies->ndeps; i++) + { + MVDependency *d = dependencies->deps[i]; + + memcpy(tmp, &d->degree, sizeof(double)); + tmp += sizeof(double); + + memcpy(tmp, &d->nattributes, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + + memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes); + tmp += sizeof(AttrNumber) * d->nattributes; + + /* protect against overflow */ + Assert(tmp <= ((char *) output + len)); + } + + /* make sure we've produced exactly the right amount of data */ + Assert(tmp == ((char *) output + len)); + + return output; +} + +/* + * Reads serialized dependencies into MVDependencies structure. + */ +MVDependencies * +statext_dependencies_deserialize(bytea *data) +{ + int i; + Size min_expected_size; + MVDependencies *dependencies; + char *tmp; + + if (data == NULL) + return NULL; + + if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader) + elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)", + VARSIZE_ANY_EXHDR(data), SizeOfHeader); + + /* read the MVDependencies header */ + dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* read the header fields and perform basic sanity checks */ + memcpy(&dependencies->magic, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(&dependencies->type, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(&dependencies->ndeps, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + + if (dependencies->magic != STATS_DEPS_MAGIC) + elog(ERROR, "invalid dependency magic %d (expected %d)", + dependencies->magic, STATS_DEPS_MAGIC); + + if (dependencies->type != STATS_DEPS_TYPE_BASIC) + elog(ERROR, "invalid dependency type %d (expected %d)", + dependencies->type, STATS_DEPS_TYPE_BASIC); + + if (dependencies->ndeps == 0) + elog(ERROR, "invalid zero-length item array in MVDependencies"); + + /* what minimum bytea size do we expect for those parameters */ + min_expected_size = SizeOfItem(dependencies->ndeps); + + if (VARSIZE_ANY_EXHDR(data) < min_expected_size) + elog(ERROR, "invalid dependencies size %zu (expected at least %zu)", + VARSIZE_ANY_EXHDR(data), min_expected_size); + + /* allocate space for the MCV items */ + dependencies = repalloc(dependencies, offsetof(MVDependencies, deps) + + (dependencies->ndeps * sizeof(MVDependency *))); + + for (i = 0; i < dependencies->ndeps; i++) + { + double degree; + AttrNumber k; + MVDependency *d; + + /* degree of validity */ + memcpy(°ree, tmp, sizeof(double)); + tmp += sizeof(double); + + /* number of attributes */ + memcpy(&k, tmp, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + + /* is the number of attributes valid? */ + Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS)); + + /* now that we know the number of attributes, allocate the dependency */ + d = (MVDependency *) palloc0(offsetof(MVDependency, attributes) + + (k * sizeof(AttrNumber))); + + d->degree = degree; + d->nattributes = k; + + /* copy attribute numbers */ + memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes); + tmp += sizeof(AttrNumber) * d->nattributes; + + dependencies->deps[i] = d; + + /* still within the bytea */ + Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); + } + + /* we should have consumed the whole bytea exactly */ + Assert(tmp == ((char *) data + VARSIZE_ANY(data))); + + return dependencies; +} + +/* + * dependency_is_fully_matched + * checks that a functional dependency is fully matched given clauses on + * attributes (assuming the clauses are suitable equality clauses) + */ +static bool +dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums) +{ + int j; + + /* + * Check that the dependency actually is fully covered by clauses. We have + * to translate all attribute numbers, as those are referenced + */ + for (j = 0; j < dependency->nattributes; j++) + { + int attnum = dependency->attributes[j]; + + if (!bms_is_member(attnum, attnums)) + return false; + } + + return true; +} + +/* + * statext_dependencies_load + * Load the functional dependencies for the indicated pg_statistic_ext tuple + */ +MVDependencies * +statext_dependencies_load(Oid mvoid, bool inh) +{ + MVDependencies *result; + bool isnull; + Datum deps; + HeapTuple htup; + + htup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(mvoid), + BoolGetDatum(inh)); + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", mvoid); + + deps = SysCacheGetAttr(STATEXTDATASTXOID, htup, + Anum_pg_statistic_ext_data_stxddependencies, &isnull); + if (isnull) + elog(ERROR, + "requested statistics kind \"%c\" is not yet built for statistics object %u", + STATS_EXT_DEPENDENCIES, mvoid); + + result = statext_dependencies_deserialize(DatumGetByteaPP(deps)); + + ReleaseSysCache(htup); + + return result; +} + +/* + * pg_dependencies_in - input routine for type pg_dependencies. + * + * pg_dependencies is real enough to be a table column, but it has no operations + * of its own, and disallows input too + */ +Datum +pg_dependencies_in(PG_FUNCTION_ARGS) +{ + /* + * pg_node_list stores the data in binary form and parsing text input is + * not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_dependencies"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_dependencies - output routine for type pg_dependencies. + */ +Datum +pg_dependencies_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVDependencies *dependencies = statext_dependencies_deserialize(data); + int i, + j; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '{'); + + for (i = 0; i < dependencies->ndeps; i++) + { + MVDependency *dependency = dependencies->deps[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + appendStringInfoChar(&str, '"'); + for (j = 0; j < dependency->nattributes; j++) + { + if (j == dependency->nattributes - 1) + appendStringInfoString(&str, " => "); + else if (j > 0) + appendStringInfoString(&str, ", "); + + appendStringInfo(&str, "%d", dependency->attributes[j]); + } + appendStringInfo(&str, "\": %f", dependency->degree); + } + + appendStringInfoChar(&str, '}'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_dependencies_recv - binary input routine for type pg_dependencies. + */ +Datum +pg_dependencies_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_dependencies"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_dependencies_send - binary output routine for type pg_dependencies. + * + * Functional dependencies are serialized in a bytea value (although the type + * is named differently), so let's just send that. + */ +Datum +pg_dependencies_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * dependency_is_compatible_clause + * Determines if the clause is compatible with functional dependencies + * + * Only clauses that have the form of equality to a pseudoconstant, or can be + * interpreted that way, are currently accepted. Furthermore the variable + * part of the clause must be a simple Var belonging to the specified + * relation, whose attribute number we return in *attnum on success. + */ +static bool +dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) +{ + Var *var; + Node *clause_expr; + + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + /* Pseudoconstants are not interesting (they couldn't contain a Var) */ + if (rinfo->pseudoconstant) + return false; + + /* Clauses referencing multiple, or no, varnos are incompatible */ + if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + return false; + + clause = (Node *) rinfo->clause; + } + + if (is_opclause(clause)) + { + /* If it's an opclause, check for Var = Const or Const = Var. */ + OpExpr *expr = (OpExpr *) clause; + + /* Only expressions with two arguments are candidates. */ + if (list_length(expr->args) != 2) + return false; + + /* Make sure non-selected argument is a pseudoconstant. */ + if (is_pseudo_constant_clause(lsecond(expr->args))) + clause_expr = linitial(expr->args); + else if (is_pseudo_constant_clause(linitial(expr->args))) + clause_expr = lsecond(expr->args); + else + return false; + + /* + * If it's not an "=" operator, just ignore the clause, as it's not + * compatible with functional dependencies. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + * + * XXX this is pretty dubious; probably it'd be better to check btree + * or hash opclass membership, so as not to be fooled by custom + * selectivity functions, and to be more consistent with decisions + * elsewhere in the planner. + */ + if (get_oprrest(expr->opno) != F_EQSEL) + return false; + + /* OK to proceed with checking "var" */ + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + /* If it's an scalar array operator, check for Var IN Const. */ + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + + /* + * Reject ALL() variant, we only care about ANY/IN. + * + * XXX Maybe we should check if all the values are the same, and allow + * ALL in that case? Doesn't seem very practical, though. + */ + if (!expr->useOr) + return false; + + /* Only expressions with two arguments are candidates. */ + if (list_length(expr->args) != 2) + return false; + + /* + * We know it's always (Var IN Const), so we assume the var is the + * first argument, and pseudoconstant is the second one. + */ + if (!is_pseudo_constant_clause(lsecond(expr->args))) + return false; + + clause_expr = linitial(expr->args); + + /* + * If it's not an "=" operator, just ignore the clause, as it's not + * compatible with functional dependencies. The operator is identified + * simply by looking at which function it uses to estimate + * selectivity. That's a bit strange, but it's what other similar + * places do. + */ + if (get_oprrest(expr->opno) != F_EQSEL) + return false; + + /* OK to proceed with checking "var" */ + } + else if (is_orclause(clause)) + { + BoolExpr *bool_expr = (BoolExpr *) clause; + ListCell *lc; + + /* start with no attribute number */ + *attnum = InvalidAttrNumber; + + foreach(lc, bool_expr->args) + { + AttrNumber clause_attnum; + + /* + * Had we found incompatible clause in the arguments, treat the + * whole clause as incompatible. + */ + if (!dependency_is_compatible_clause((Node *) lfirst(lc), + relid, &clause_attnum)) + return false; + + if (*attnum == InvalidAttrNumber) + *attnum = clause_attnum; + + /* ensure all the variables are the same (same attnum) */ + if (*attnum != clause_attnum) + return false; + } + + /* the Var is already checked by the recursive call */ + return true; + } + else if (is_notclause(clause)) + { + /* + * "NOT x" can be interpreted as "x = false", so get the argument and + * proceed with seeing if it's a suitable Var. + */ + clause_expr = (Node *) get_notclausearg(clause); + } + else + { + /* + * A boolean expression "x" can be interpreted as "x = true", so + * proceed with seeing if it's a suitable Var. + */ + clause_expr = (Node *) clause; + } + + /* + * We may ignore any RelabelType node above the operand. (There won't be + * more than one, since eval_const_expressions has been applied already.) + */ + if (IsA(clause_expr, RelabelType)) + clause_expr = (Node *) ((RelabelType *) clause_expr)->arg; + + /* We only support plain Vars for now */ + if (!IsA(clause_expr, Var)) + return false; + + /* OK, we know we have a Var */ + var = (Var *) clause_expr; + + /* Ensure Var is from the correct relation */ + if (var->varno != relid) + return false; + + /* We also better ensure the Var is from the current level */ + if (var->varlevelsup != 0) + return false; + + /* Also ignore system attributes (we don't allow stats on those) */ + if (!AttrNumberIsForUserDefinedAttr(var->varattno)) + return false; + + *attnum = var->varattno; + return true; +} + +/* + * find_strongest_dependency + * find the strongest dependency on the attributes + * + * When applying functional dependencies, we start with the strongest + * dependencies. That is, we select the dependency that: + * + * (a) has all attributes covered by equality clauses + * + * (b) has the most attributes + * + * (c) has the highest degree of validity + * + * This guarantees that we eliminate the most redundant conditions first + * (see the comment in dependencies_clauselist_selectivity). + */ +static MVDependency * +find_strongest_dependency(MVDependencies **dependencies, int ndependencies, + Bitmapset *attnums) +{ + int i, + j; + MVDependency *strongest = NULL; + + /* number of attnums in clauses */ + int nattnums = bms_num_members(attnums); + + /* + * Iterate over the MVDependency items and find the strongest one from the + * fully-matched dependencies. We do the cheap checks first, before + * matching it against the attnums. + */ + for (i = 0; i < ndependencies; i++) + { + for (j = 0; j < dependencies[i]->ndeps; j++) + { + MVDependency *dependency = dependencies[i]->deps[j]; + + /* + * Skip dependencies referencing more attributes than available + * clauses, as those can't be fully matched. + */ + if (dependency->nattributes > nattnums) + continue; + + if (strongest) + { + /* skip dependencies on fewer attributes than the strongest. */ + if (dependency->nattributes < strongest->nattributes) + continue; + + /* also skip weaker dependencies when attribute count matches */ + if (strongest->nattributes == dependency->nattributes && + strongest->degree > dependency->degree) + continue; + } + + /* + * this dependency is stronger, but we must still check that it's + * fully matched to these attnums. We perform this check last as + * it's slightly more expensive than the previous checks. + */ + if (dependency_is_fully_matched(dependency, attnums)) + strongest = dependency; /* save new best match */ + } + } + + return strongest; +} + +/* + * clauselist_apply_dependencies + * Apply the specified functional dependencies to a list of clauses and + * return the estimated selectivity of the clauses that are compatible + * with any of the given dependencies. + * + * This will estimate all not-already-estimated clauses that are compatible + * with functional dependencies, and which have an attribute mentioned by any + * of the given dependencies (either as an implying or implied attribute). + * + * Given (lists of) clauses on attributes (a,b) and a functional dependency + * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined + * using the formula + * + * P(a,b) = f * P(a) + (1-f) * P(a) * P(b) + * + * where 'f' is the degree of dependency. This reflects the fact that we + * expect a fraction f of all rows to be consistent with the dependency + * (a=>b), and so have a selectivity of P(a), while the remaining rows are + * treated as independent. + * + * In practice, we use a slightly modified version of this formula, which uses + * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result + * should obviously not exceed either column's individual selectivity. I.e., + * we actually combine selectivities using the formula + * + * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b) + * + * This can make quite a difference if the specific values matching the + * clauses are not consistent with the functional dependency. + */ +static Selectivity +clauselist_apply_dependencies(PlannerInfo *root, List *clauses, + int varRelid, JoinType jointype, + SpecialJoinInfo *sjinfo, + MVDependency **dependencies, int ndependencies, + AttrNumber *list_attnums, + Bitmapset **estimatedclauses) +{ + Bitmapset *attnums; + int i; + int j; + int nattrs; + Selectivity *attr_sel; + int attidx; + int listidx; + ListCell *l; + Selectivity s1; + + /* + * Extract the attnums of all implying and implied attributes from all the + * given dependencies. Each of these attributes is expected to have at + * least 1 not-already-estimated compatible clause that we will estimate + * here. + */ + attnums = NULL; + for (i = 0; i < ndependencies; i++) + { + for (j = 0; j < dependencies[i]->nattributes; j++) + { + AttrNumber attnum = dependencies[i]->attributes[j]; + + attnums = bms_add_member(attnums, attnum); + } + } + + /* + * Compute per-column selectivity estimates for each of these attributes, + * and mark all the corresponding clauses as estimated. + */ + nattrs = bms_num_members(attnums); + attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs); + + attidx = 0; + i = -1; + while ((i = bms_next_member(attnums, i)) >= 0) + { + List *attr_clauses = NIL; + Selectivity simple_sel; + + listidx = -1; + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + + listidx++; + if (list_attnums[listidx] == i) + { + attr_clauses = lappend(attr_clauses, clause); + *estimatedclauses = bms_add_member(*estimatedclauses, listidx); + } + } + + simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid, + jointype, sjinfo, false); + attr_sel[attidx++] = simple_sel; + } + + /* + * Now combine these selectivities using the dependency information. For + * chains of dependencies such as a -> b -> c, the b -> c dependency will + * come before the a -> b dependency in the array, so we traverse the + * array backwards to ensure such chains are computed in the right order. + * + * As explained above, pairs of selectivities are combined using the + * formula + * + * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b) + * + * to ensure that the combined selectivity is never greater than either + * individual selectivity. + * + * Where multiple dependencies apply (e.g., a -> b -> c), we use + * conditional probabilities to compute the overall result as follows: + * + * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a) + * + * so we replace the selectivities of all implied attributes with + * conditional probabilities, that are conditional on all their implying + * attributes. The selectivities of all other non-implied attributes are + * left as they are. + */ + for (i = ndependencies - 1; i >= 0; i--) + { + MVDependency *dependency = dependencies[i]; + AttrNumber attnum; + Selectivity s2; + double f; + + /* Selectivity of all the implying attributes */ + s1 = 1.0; + for (j = 0; j < dependency->nattributes - 1; j++) + { + attnum = dependency->attributes[j]; + attidx = bms_member_index(attnums, attnum); + s1 *= attr_sel[attidx]; + } + + /* Original selectivity of the implied attribute */ + attnum = dependency->attributes[j]; + attidx = bms_member_index(attnums, attnum); + s2 = attr_sel[attidx]; + + /* + * Replace s2 with the conditional probability s2 given s1, computed + * using the formula P(b|a) = P(a,b) / P(a), which simplifies to + * + * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b) + * + * where P(a) = s1, the selectivity of the implying attributes, and + * P(b) = s2, the selectivity of the implied attribute. + */ + f = dependency->degree; + + if (s1 <= s2) + attr_sel[attidx] = f + (1 - f) * s2; + else + attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2; + } + + /* + * The overall selectivity of all the clauses on all these attributes is + * then the product of all the original (non-implied) probabilities and + * the new conditional (implied) probabilities. + */ + s1 = 1.0; + for (i = 0; i < nattrs; i++) + s1 *= attr_sel[i]; + + CLAMP_PROBABILITY(s1); + + pfree(attr_sel); + bms_free(attnums); + + return s1; +} + +/* + * dependency_is_compatible_expression + * Determines if the expression is compatible with functional dependencies + * + * Similar to dependency_is_compatible_clause, but doesn't enforce that the + * expression is a simple Var. On success, return the matching statistics + * expression into *expr. + */ +static bool +dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr) +{ + ListCell *lc, + *lc2; + Node *clause_expr; + + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + /* Pseudoconstants are not interesting (they couldn't contain a Var) */ + if (rinfo->pseudoconstant) + return false; + + /* Clauses referencing multiple, or no, varnos are incompatible */ + if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + return false; + + clause = (Node *) rinfo->clause; + } + + if (is_opclause(clause)) + { + /* If it's an opclause, check for Var = Const or Const = Var. */ + OpExpr *expr = (OpExpr *) clause; + + /* Only expressions with two arguments are candidates. */ + if (list_length(expr->args) != 2) + return false; + + /* Make sure non-selected argument is a pseudoconstant. */ + if (is_pseudo_constant_clause(lsecond(expr->args))) + clause_expr = linitial(expr->args); + else if (is_pseudo_constant_clause(linitial(expr->args))) + clause_expr = lsecond(expr->args); + else + return false; + + /* + * If it's not an "=" operator, just ignore the clause, as it's not + * compatible with functional dependencies. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + * + * XXX this is pretty dubious; probably it'd be better to check btree + * or hash opclass membership, so as not to be fooled by custom + * selectivity functions, and to be more consistent with decisions + * elsewhere in the planner. + */ + if (get_oprrest(expr->opno) != F_EQSEL) + return false; + + /* OK to proceed with checking "var" */ + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + /* If it's an scalar array operator, check for Var IN Const. */ + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + + /* + * Reject ALL() variant, we only care about ANY/IN. + * + * FIXME Maybe we should check if all the values are the same, and + * allow ALL in that case? Doesn't seem very practical, though. + */ + if (!expr->useOr) + return false; + + /* Only expressions with two arguments are candidates. */ + if (list_length(expr->args) != 2) + return false; + + /* + * We know it's always (Var IN Const), so we assume the var is the + * first argument, and pseudoconstant is the second one. + */ + if (!is_pseudo_constant_clause(lsecond(expr->args))) + return false; + + clause_expr = linitial(expr->args); + + /* + * If it's not an "=" operator, just ignore the clause, as it's not + * compatible with functional dependencies. The operator is identified + * simply by looking at which function it uses to estimate + * selectivity. That's a bit strange, but it's what other similar + * places do. + */ + if (get_oprrest(expr->opno) != F_EQSEL) + return false; + + /* OK to proceed with checking "var" */ + } + else if (is_orclause(clause)) + { + BoolExpr *bool_expr = (BoolExpr *) clause; + + /* start with no expression (we'll use the first match) */ + *expr = NULL; + + foreach(lc, bool_expr->args) + { + Node *or_expr = NULL; + + /* + * Had we found incompatible expression in the arguments, treat + * the whole expression as incompatible. + */ + if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid, + statlist, &or_expr)) + return false; + + if (*expr == NULL) + *expr = or_expr; + + /* ensure all the expressions are the same */ + if (!equal(or_expr, *expr)) + return false; + } + + /* the expression is already checked by the recursive call */ + return true; + } + else if (is_notclause(clause)) + { + /* + * "NOT x" can be interpreted as "x = false", so get the argument and + * proceed with seeing if it's a suitable Var. + */ + clause_expr = (Node *) get_notclausearg(clause); + } + else + { + /* + * A boolean expression "x" can be interpreted as "x = true", so + * proceed with seeing if it's a suitable Var. + */ + clause_expr = (Node *) clause; + } + + /* + * We may ignore any RelabelType node above the operand. (There won't be + * more than one, since eval_const_expressions has been applied already.) + */ + if (IsA(clause_expr, RelabelType)) + clause_expr = (Node *) ((RelabelType *) clause_expr)->arg; + + /* + * Search for a matching statistics expression. + */ + foreach(lc, statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + + /* ignore stats without dependencies */ + if (info->kind != STATS_EXT_DEPENDENCIES) + continue; + + foreach(lc2, info->exprs) + { + Node *stat_expr = (Node *) lfirst(lc2); + + if (equal(clause_expr, stat_expr)) + { + *expr = stat_expr; + return true; + } + } + } + + return false; +} + +/* + * dependencies_clauselist_selectivity + * Return the estimated selectivity of (a subset of) the given clauses + * using functional dependency statistics, or 1.0 if no useful functional + * dependency statistic exists. + * + * 'estimatedclauses' is an input/output argument that gets a bit set + * corresponding to the (zero-based) list index of each clause that is included + * in the estimated selectivity. + * + * Given equality clauses on attributes (a,b) we find the strongest dependency + * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected + * dependency, we then combine the per-clause selectivities using the formula + * + * P(a,b) = f * P(a) + (1-f) * P(a) * P(b) + * + * where 'f' is the degree of the dependency. (Actually we use a slightly + * modified version of this formula -- see clauselist_apply_dependencies()). + * + * With clauses on more than two attributes, the dependencies are applied + * recursively, starting with the widest/strongest dependencies. For example + * P(a,b,c) is first split like this: + * + * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c) + * + * assuming (a,b=>c) is the strongest dependency. + */ +Selectivity +dependencies_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses) +{ + Selectivity s1 = 1.0; + ListCell *l; + Bitmapset *clauses_attnums = NULL; + AttrNumber *list_attnums; + int listidx; + MVDependencies **func_dependencies; + int nfunc_dependencies; + int total_ndeps; + MVDependency **dependencies; + int ndependencies; + int i; + AttrNumber attnum_offset; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + /* unique expressions */ + Node **unique_exprs; + int unique_exprs_cnt; + + /* check if there's any stats that might be useful for us. */ + if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) + return 1.0; + + list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * + list_length(clauses)); + + /* + * We allocate space as if every clause was a unique expression, although + * that's probably overkill. Some will be simple column references that + * we'll translate to attnums, and there might be duplicates. But it's + * easier and cheaper to just do one allocation than repalloc later. + */ + unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses)); + unique_exprs_cnt = 0; + + /* + * Pre-process the clauses list to extract the attnums seen in each item. + * We need to determine if there's any clauses which will be useful for + * dependency selectivity estimations. Along the way we'll record all of + * the attnums for each clause in a list which we'll reference later so we + * don't need to repeat the same work again. We'll also keep track of all + * attnums seen. + * + * We also skip clauses that we already estimated using different types of + * statistics (we treat them as incompatible). + * + * To handle expressions, we assign them negative attnums, as if it was a + * system attribute (this is fine, as we only allow extended stats on user + * attributes). And then we offset everything by the number of + * expressions, so that we can store the values in a bitmapset. + */ + listidx = 0; + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + AttrNumber attnum; + Node *expr = NULL; + + /* ignore clause by default */ + list_attnums[listidx] = InvalidAttrNumber; + + if (!bms_is_member(listidx, *estimatedclauses)) + { + /* + * If it's a simple column reference, just extract the attnum. If + * it's an expression, assign a negative attnum as if it was a + * system attribute. + */ + if (dependency_is_compatible_clause(clause, rel->relid, &attnum)) + { + list_attnums[listidx] = attnum; + } + else if (dependency_is_compatible_expression(clause, rel->relid, + rel->statlist, + &expr)) + { + /* special attnum assigned to this expression */ + attnum = InvalidAttrNumber; + + Assert(expr != NULL); + + /* If the expression is duplicate, use the same attnum. */ + for (i = 0; i < unique_exprs_cnt; i++) + { + if (equal(unique_exprs[i], expr)) + { + /* negative attribute number to expression */ + attnum = -(i + 1); + break; + } + } + + /* not found in the list, so add it */ + if (attnum == InvalidAttrNumber) + { + unique_exprs[unique_exprs_cnt++] = expr; + + /* after incrementing the value, to get -1, -2, ... */ + attnum = (-unique_exprs_cnt); + } + + /* remember which attnum was assigned to this clause */ + list_attnums[listidx] = attnum; + } + } + + listidx++; + } + + Assert(listidx == list_length(clauses)); + + /* + * How much we need to offset the attnums? If there are no expressions, + * then no offset is needed. Otherwise we need to offset enough for the + * lowest value (-unique_exprs_cnt) to become 1. + */ + if (unique_exprs_cnt > 0) + attnum_offset = (unique_exprs_cnt + 1); + else + attnum_offset = 0; + + /* + * Now that we know how many expressions there are, we can offset the + * values just enough to build the bitmapset. + */ + for (i = 0; i < list_length(clauses); i++) + { + AttrNumber attnum; + + /* ignore incompatible or already estimated clauses */ + if (list_attnums[i] == InvalidAttrNumber) + continue; + + /* make sure the attnum is in the expected range */ + Assert(list_attnums[i] >= (-unique_exprs_cnt)); + Assert(list_attnums[i] <= MaxHeapAttributeNumber); + + /* make sure the attnum is positive (valid AttrNumber) */ + attnum = list_attnums[i] + attnum_offset; + + /* + * Either it's a regular attribute, or it's an expression, in which + * case we must not have seen it before (expressions are unique). + * + * XXX Check whether it's a regular attribute has to be done using the + * original attnum, while the second check has to use the value with + * an offset. + */ + Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) || + !bms_is_member(attnum, clauses_attnums)); + + /* + * Remember the offset attnum, both for attributes and expressions. + * We'll pass list_attnums to clauselist_apply_dependencies, which + * uses it to identify clauses in a bitmap. We could also pass the + * offset, but this is more convenient. + */ + list_attnums[i] = attnum; + + clauses_attnums = bms_add_member(clauses_attnums, attnum); + } + + /* + * If there's not at least two distinct attnums and expressions, then + * reject the whole list of clauses. We must return 1.0 so the calling + * function's selectivity is unaffected. + */ + if (bms_membership(clauses_attnums) != BMS_MULTIPLE) + { + bms_free(clauses_attnums); + pfree(list_attnums); + return 1.0; + } + + /* + * Load all functional dependencies matching at least two parameters. We + * can simply consider all dependencies at once, without having to search + * for the best statistics object. + * + * To not waste cycles and memory, we deserialize dependencies only for + * statistics that match at least two attributes. The array is allocated + * with the assumption that all objects match - we could grow the array to + * make it just the right size, but it's likely wasteful anyway thanks to + * moving the freed chunks to freelists etc. + */ + func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) * + list_length(rel->statlist)); + nfunc_dependencies = 0; + total_ndeps = 0; + + foreach(l, rel->statlist) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); + int nmatched; + int nexprs; + int k; + MVDependencies *deps; + + /* skip statistics that are not of the correct type */ + if (stat->kind != STATS_EXT_DEPENDENCIES) + continue; + + /* skip statistics with mismatching stxdinherit value */ + if (stat->inherit != rte->inh) + continue; + + /* + * Count matching attributes - we have to undo the attnum offsets. The + * input attribute numbers are not offset (expressions are not + * included in stat->keys, so it's not necessary). But we need to + * offset it before checking against clauses_attnums. + */ + nmatched = 0; + k = -1; + while ((k = bms_next_member(stat->keys, k)) >= 0) + { + AttrNumber attnum = (AttrNumber) k; + + /* skip expressions */ + if (!AttrNumberIsForUserDefinedAttr(attnum)) + continue; + + /* apply the same offset as above */ + attnum += attnum_offset; + + if (bms_is_member(attnum, clauses_attnums)) + nmatched++; + } + + /* count matching expressions */ + nexprs = 0; + for (i = 0; i < unique_exprs_cnt; i++) + { + ListCell *lc; + + foreach(lc, stat->exprs) + { + Node *stat_expr = (Node *) lfirst(lc); + + /* try to match it */ + if (equal(stat_expr, unique_exprs[i])) + nexprs++; + } + } + + /* + * Skip objects matching fewer than two attributes/expressions from + * clauses. + */ + if (nmatched + nexprs < 2) + continue; + + deps = statext_dependencies_load(stat->statOid, rte->inh); + + /* + * The expressions may be represented by different attnums in the + * stats, we need to remap them to be consistent with the clauses. + * That will make the later steps (e.g. picking the strongest item and + * so on) much simpler and cheaper, because it won't need to care + * about the offset at all. + * + * When we're at it, we can ignore dependencies that are not fully + * matched by clauses (i.e. referencing attributes or expressions that + * are not in the clauses). + * + * We have to do this for all statistics, as long as there are any + * expressions - we need to shift the attnums in all dependencies. + * + * XXX Maybe we should do this always, because it also eliminates some + * of the dependencies early. It might be cheaper than having to walk + * the longer list in find_strongest_dependency later, especially as + * we need to do that repeatedly? + * + * XXX We have to do this even when there are no expressions in + * clauses, otherwise find_strongest_dependency may fail for stats + * with expressions (due to lookup of negative value in bitmap). So we + * need to at least filter out those dependencies. Maybe we could do + * it in a cheaper way (if there are no expr clauses, we can just + * discard all negative attnums without any lookups). + */ + if (unique_exprs_cnt > 0 || stat->exprs != NIL) + { + int ndeps = 0; + + for (i = 0; i < deps->ndeps; i++) + { + bool skip = false; + MVDependency *dep = deps->deps[i]; + int j; + + for (j = 0; j < dep->nattributes; j++) + { + int idx; + Node *expr; + AttrNumber unique_attnum = InvalidAttrNumber; + AttrNumber attnum; + + /* undo the per-statistics offset */ + attnum = dep->attributes[j]; + + /* + * For regular attributes we can simply check if it + * matches any clause. If there's no matching clause, we + * can just ignore it. We need to offset the attnum + * though. + */ + if (AttrNumberIsForUserDefinedAttr(attnum)) + { + dep->attributes[j] = attnum + attnum_offset; + + if (!bms_is_member(dep->attributes[j], clauses_attnums)) + { + skip = true; + break; + } + + continue; + } + + /* + * the attnum should be a valid system attnum (-1, -2, + * ...) + */ + Assert(AttributeNumberIsValid(attnum)); + + /* + * For expressions, we need to do two translations. First + * we have to translate the negative attnum to index in + * the list of expressions (in the statistics object). + * Then we need to see if there's a matching clause. The + * index of the unique expression determines the attnum + * (and we offset it). + */ + idx = -(1 + attnum); + + /* Is the expression index is valid? */ + Assert((idx >= 0) && (idx < list_length(stat->exprs))); + + expr = (Node *) list_nth(stat->exprs, idx); + + /* try to find the expression in the unique list */ + for (int m = 0; m < unique_exprs_cnt; m++) + { + /* + * found a matching unique expression, use the attnum + * (derived from index of the unique expression) + */ + if (equal(unique_exprs[m], expr)) + { + unique_attnum = -(m + 1) + attnum_offset; + break; + } + } + + /* + * Found no matching expression, so we can simply skip + * this dependency, because there's no chance it will be + * fully covered. + */ + if (unique_attnum == InvalidAttrNumber) + { + skip = true; + break; + } + + /* otherwise remap it to the new attnum */ + dep->attributes[j] = unique_attnum; + } + + /* if found a matching dependency, keep it */ + if (!skip) + { + /* maybe we've skipped something earlier, so move it */ + if (ndeps != i) + deps->deps[ndeps] = deps->deps[i]; + + ndeps++; + } + } + + deps->ndeps = ndeps; + } + + /* + * It's possible we've removed all dependencies, in which case we + * don't bother adding it to the list. + */ + if (deps->ndeps > 0) + { + func_dependencies[nfunc_dependencies] = deps; + total_ndeps += deps->ndeps; + nfunc_dependencies++; + } + } + + /* if no matching stats could be found then we've nothing to do */ + if (nfunc_dependencies == 0) + { + pfree(func_dependencies); + bms_free(clauses_attnums); + pfree(list_attnums); + pfree(unique_exprs); + return 1.0; + } + + /* + * Work out which dependencies we can apply, starting with the + * widest/strongest ones, and proceeding to smaller/weaker ones. + */ + dependencies = (MVDependency **) palloc(sizeof(MVDependency *) * + total_ndeps); + ndependencies = 0; + + while (true) + { + MVDependency *dependency; + AttrNumber attnum; + + /* the widest/strongest dependency, fully matched by clauses */ + dependency = find_strongest_dependency(func_dependencies, + nfunc_dependencies, + clauses_attnums); + if (!dependency) + break; + + dependencies[ndependencies++] = dependency; + + /* Ignore dependencies using this implied attribute in later loops */ + attnum = dependency->attributes[dependency->nattributes - 1]; + clauses_attnums = bms_del_member(clauses_attnums, attnum); + } + + /* + * If we found applicable dependencies, use them to estimate all + * compatible clauses on attributes that they refer to. + */ + if (ndependencies != 0) + s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype, + sjinfo, dependencies, ndependencies, + list_attnums, estimatedclauses); + + /* free deserialized functional dependencies (and then the array) */ + for (i = 0; i < nfunc_dependencies; i++) + pfree(func_dependencies[i]); + + pfree(dependencies); + pfree(func_dependencies); + bms_free(clauses_attnums); + pfree(list_attnums); + pfree(unique_exprs); + + return s1; +} diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c new file mode 100644 index 0000000..28b52d8 --- /dev/null +++ b/src/backend/statistics/extended_stats.c @@ -0,0 +1,2662 @@ +/*------------------------------------------------------------------------- + * + * extended_stats.c + * POSTGRES extended statistics + * + * Generic code supporting statistics objects created via CREATE STATISTICS. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/extended_stats.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "executor/executor.h" +#include "commands/defrem.h" +#include "commands/progress.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "parser/parsetree.h" +#include "pgstat.h" +#include "postmaster/autovacuum.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/attoptcache.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/selfuncs.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* + * To avoid consuming too much memory during analysis and/or too much space + * in the resulting pg_statistic rows, we ignore varlena datums that are wider + * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV + * and distinct-value calculations since a wide value is unlikely to be + * duplicated at all, much less be a most-common value. For the same reason, + * ignoring wide values will not affect our estimates of histogram bin + * boundaries very much. + */ +#define WIDTH_THRESHOLD 1024 + +/* + * Used internally to refer to an individual statistics object, i.e., + * a pg_statistic_ext entry. + */ +typedef struct StatExtEntry +{ + Oid statOid; /* OID of pg_statistic_ext entry */ + char *schema; /* statistics object's schema */ + char *name; /* statistics object's name */ + Bitmapset *columns; /* attribute numbers covered by the object */ + List *types; /* 'char' list of enabled statistics kinds */ + int stattarget; /* statistics target (-1 for default) */ + List *exprs; /* expressions */ +} StatExtEntry; + + +static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); +static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs, + int nvacatts, VacAttrStats **vacatts); +static void statext_store(Oid statOid, bool inh, + MVNDistinct *ndistinct, MVDependencies *dependencies, + MCVList *mcv, Datum exprs, VacAttrStats **stats); +static int statext_compute_stattarget(int stattarget, + int nattrs, VacAttrStats **stats); + +/* Information needed to analyze a single simple expression. */ +typedef struct AnlExprData +{ + Node *expr; /* expression to analyze */ + VacAttrStats *vacattrstat; /* statistics attrs to analyze */ +} AnlExprData; + +static void compute_expr_stats(Relation onerel, double totalrows, + AnlExprData *exprdata, int nexprs, + HeapTuple *rows, int numrows); +static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs); +static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); +static AnlExprData *build_expr_data(List *exprs, int stattarget); + +static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat, + int numrows, HeapTuple *rows, + VacAttrStats **stats, int stattarget); + + +/* + * Compute requested extended stats, using the rows sampled for the plain + * (single-column) stats. + * + * This fetches a list of stats types from pg_statistic_ext, computes the + * requested stats, and serializes them back into the catalog. + */ +void +BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats) +{ + Relation pg_stext; + ListCell *lc; + List *statslist; + MemoryContext cxt; + MemoryContext oldcxt; + int64 ext_cnt; + + /* Do nothing if there are no columns to analyze. */ + if (!natts) + return; + + /* the list of stats has to be allocated outside the memory context */ + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + + /* memory context for building each statistics object */ + cxt = AllocSetContextCreate(CurrentMemoryContext, + "BuildRelationExtStatistics", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(cxt); + + /* report this phase */ + if (statslist != NIL) + { + const int index[] = { + PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_EXT_STATS_TOTAL + }; + const int64 val[] = { + PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS, + list_length(statslist) + }; + + pgstat_progress_update_multi_param(2, index, val); + } + + ext_cnt = 0; + foreach(lc, statslist) + { + StatExtEntry *stat = (StatExtEntry *) lfirst(lc); + MVNDistinct *ndistinct = NULL; + MVDependencies *dependencies = NULL; + MCVList *mcv = NULL; + Datum exprstats = (Datum) 0; + VacAttrStats **stats; + ListCell *lc2; + int stattarget; + StatsBuildData *data; + + /* + * Check if we can build these stats based on the column analyzed. If + * not, report this fact (except in autovacuum) and move on. + */ + stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs, + natts, vacattrstats); + if (!stats) + { + if (!IsAutoVacuumWorkerProcess()) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"", + stat->schema, stat->name, + get_namespace_name(onerel->rd_rel->relnamespace), + RelationGetRelationName(onerel)), + errtable(onerel))); + continue; + } + + /* compute statistics target for this statistics object */ + stattarget = statext_compute_stattarget(stat->stattarget, + bms_num_members(stat->columns), + stats); + + /* + * Don't rebuild statistics objects with statistics target set to 0 + * (we just leave the existing values around, just like we do for + * regular per-column statistics). + */ + if (stattarget == 0) + continue; + + /* evaluate expressions (if the statistics object has any) */ + data = make_build_data(onerel, stat, numrows, rows, stats, stattarget); + + /* compute statistic of each requested type */ + foreach(lc2, stat->types) + { + char t = (char) lfirst_int(lc2); + + if (t == STATS_EXT_NDISTINCT) + ndistinct = statext_ndistinct_build(totalrows, data); + else if (t == STATS_EXT_DEPENDENCIES) + dependencies = statext_dependencies_build(data); + else if (t == STATS_EXT_MCV) + mcv = statext_mcv_build(data, totalrows, stattarget); + else if (t == STATS_EXT_EXPRESSIONS) + { + AnlExprData *exprdata; + int nexprs; + + /* should not happen, thanks to checks when defining stats */ + if (!stat->exprs) + elog(ERROR, "requested expression stats, but there are no expressions"); + + exprdata = build_expr_data(stat->exprs, stattarget); + nexprs = list_length(stat->exprs); + + compute_expr_stats(onerel, totalrows, + exprdata, nexprs, + rows, numrows); + + exprstats = serialize_expr_stats(exprdata, nexprs); + } + } + + /* store the statistics in the catalog */ + statext_store(stat->statOid, inh, + ndistinct, dependencies, mcv, exprstats, stats); + + /* for reporting progress */ + pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED, + ++ext_cnt); + + /* free the data used for building this statistics object */ + MemoryContextReset(cxt); + } + + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(cxt); + + list_free(statslist); + + table_close(pg_stext, RowExclusiveLock); +} + +/* + * ComputeExtStatisticsRows + * Compute number of rows required by extended statistics on a table. + * + * Computes number of rows we need to sample to build extended statistics on a + * table. This only looks at statistics we can actually build - for example + * when analyzing only some of the columns, this will skip statistics objects + * that would require additional columns. + * + * See statext_compute_stattarget for details about how we compute the + * statistics target for a statistics object (from the object target, + * attribute targets and default statistics target). + */ +int +ComputeExtStatisticsRows(Relation onerel, + int natts, VacAttrStats **vacattrstats) +{ + Relation pg_stext; + ListCell *lc; + List *lstats; + MemoryContext cxt; + MemoryContext oldcxt; + int result = 0; + + /* If there are no columns to analyze, just return 0. */ + if (!natts) + return 0; + + cxt = AllocSetContextCreate(CurrentMemoryContext, + "ComputeExtStatisticsRows", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(cxt); + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + + foreach(lc, lstats) + { + StatExtEntry *stat = (StatExtEntry *) lfirst(lc); + int stattarget; + VacAttrStats **stats; + int nattrs = bms_num_members(stat->columns); + + /* + * Check if we can build this statistics object based on the columns + * analyzed. If not, ignore it (don't report anything, we'll do that + * during the actual build BuildRelationExtStatistics). + */ + stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs, + natts, vacattrstats); + + if (!stats) + continue; + + /* + * Compute statistics target, based on what's set for the statistic + * object itself, and for its attributes. + */ + stattarget = statext_compute_stattarget(stat->stattarget, + nattrs, stats); + + /* Use the largest value for all statistics objects. */ + if (stattarget > result) + result = stattarget; + } + + table_close(pg_stext, RowExclusiveLock); + + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(cxt); + + /* compute sample size based on the statistics target */ + return (300 * result); +} + +/* + * statext_compute_stattarget + * compute statistics target for an extended statistic + * + * When computing target for extended statistics objects, we consider three + * places where the target may be set - the statistics object itself, + * attributes the statistics object is defined on, and then the default + * statistics target. + * + * First we look at what's set for the statistics object itself, using the + * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value + * there (i.e. not -1) we're done. Otherwise we look at targets set for any + * of the attributes the statistic is defined on, and if there are columns + * with defined target, we use the maximum value. We do this mostly for + * backwards compatibility, because this is what we did before having + * statistics target for extended statistics. + * + * And finally, if we still don't have a statistics target, we use the value + * set in default_statistics_target. + */ +static int +statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats) +{ + int i; + + /* + * If there's statistics target set for the statistics object, use it. It + * may be set to 0 which disables building of that statistic. + */ + if (stattarget >= 0) + return stattarget; + + /* + * The target for the statistics object is set to -1, in which case we + * look at the maximum target set for any of the attributes the object is + * defined on. + */ + for (i = 0; i < nattrs; i++) + { + /* keep the maximum statistics target */ + if (stats[i]->attr->attstattarget > stattarget) + stattarget = stats[i]->attr->attstattarget; + } + + /* + * If the value is still negative (so neither the statistics object nor + * any of the columns have custom statistics target set), use the global + * default target. + */ + if (stattarget < 0) + stattarget = default_statistics_target; + + /* As this point we should have a valid statistics target. */ + Assert((stattarget >= 0) && (stattarget <= 10000)); + + return stattarget; +} + +/* + * statext_is_kind_built + * Is this stat kind built in the given pg_statistic_ext_data tuple? + */ +bool +statext_is_kind_built(HeapTuple htup, char type) +{ + AttrNumber attnum; + + switch (type) + { + case STATS_EXT_NDISTINCT: + attnum = Anum_pg_statistic_ext_data_stxdndistinct; + break; + + case STATS_EXT_DEPENDENCIES: + attnum = Anum_pg_statistic_ext_data_stxddependencies; + break; + + case STATS_EXT_MCV: + attnum = Anum_pg_statistic_ext_data_stxdmcv; + break; + + case STATS_EXT_EXPRESSIONS: + attnum = Anum_pg_statistic_ext_data_stxdexpr; + break; + + default: + elog(ERROR, "unexpected statistics type requested: %d", type); + } + + return !heap_attisnull(htup, attnum, NULL); +} + +/* + * Return a list (of StatExtEntry) of statistics objects for the given relation. + */ +static List * +fetch_statentries_for_relation(Relation pg_statext, Oid relid) +{ + SysScanDesc scan; + ScanKeyData skey; + HeapTuple htup; + List *result = NIL; + + /* + * Prepare to scan pg_statistic_ext for entries having stxrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_stxrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(scan))) + { + StatExtEntry *entry; + Datum datum; + bool isnull; + int i; + ArrayType *arr; + char *enabled; + Form_pg_statistic_ext staForm; + List *exprs = NIL; + + entry = palloc0(sizeof(StatExtEntry)); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + entry->statOid = staForm->oid; + entry->schema = get_namespace_name(staForm->stxnamespace); + entry->name = pstrdup(NameStr(staForm->stxname)); + entry->stattarget = staForm->stxstattarget; + for (i = 0; i < staForm->stxkeys.dim1; i++) + { + entry->columns = bms_add_member(entry->columns, + staForm->stxkeys.values[i]); + } + + /* decode the stxkind char array into a list of chars */ + datum = SysCacheGetAttrNotNull(STATEXTOID, htup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + Assert((enabled[i] == STATS_EXT_NDISTINCT) || + (enabled[i] == STATS_EXT_DEPENDENCIES) || + (enabled[i] == STATS_EXT_MCV) || + (enabled[i] == STATS_EXT_EXPRESSIONS)); + entry->types = lappend_int(entry->types, (int) enabled[i]); + } + + /* decode expression (if any) */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stxexprs, &isnull); + + if (!isnull) + { + char *exprsString; + + exprsString = TextDatumGetCString(datum); + exprs = (List *) stringToNode(exprsString); + + pfree(exprsString); + + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + } + + entry->exprs = exprs; + + result = lappend(result, entry); + } + + systable_endscan(scan); + + return result; +} + +/* + * examine_attribute -- pre-analysis of a single column + * + * Determine whether the column is analyzable; if so, create and initialize + * a VacAttrStats struct for it. If not, return NULL. + */ +static VacAttrStats * +examine_attribute(Node *expr) +{ + HeapTuple typtuple; + VacAttrStats *stats; + int i; + bool ok; + + /* + * Create the VacAttrStats struct. Note that we only have a copy of the + * fixed fields of the pg_attribute tuple. + */ + stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); + + /* fake the attribute */ + stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE); + stats->attr->attstattarget = -1; + + /* + * When analyzing an expression, believe the expression tree's type not + * the column datatype --- the latter might be the opckeytype storage type + * of the opclass, which is not interesting for our purposes. (Note: if + * we did anything with non-expression statistics columns, we'd need to + * figure out where to get the correct type info from, but for now that's + * not a problem.) It's not clear whether anyone will care about the + * typmod, but we store that too just in case. + */ + stats->attrtypid = exprType(expr); + stats->attrtypmod = exprTypmod(expr); + stats->attrcollid = exprCollation(expr); + + typtuple = SearchSysCacheCopy1(TYPEOID, + ObjectIdGetDatum(stats->attrtypid)); + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); + stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + + /* + * We don't actually analyze individual attributes, so no need to set the + * memory context. + */ + stats->anl_context = NULL; + stats->tupattnum = InvalidAttrNumber; + + /* + * The fields describing the stats->stavalues[n] element types default to + * the type of the data being analyzed, but the type-specific typanalyze + * function can change them if it wants to store something else. + */ + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + stats->statypid[i] = stats->attrtypid; + stats->statyplen[i] = stats->attrtype->typlen; + stats->statypbyval[i] = stats->attrtype->typbyval; + stats->statypalign[i] = stats->attrtype->typalign; + } + + /* + * Call the type-specific typanalyze function. If none is specified, use + * std_typanalyze(). + */ + if (OidIsValid(stats->attrtype->typanalyze)) + ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, + PointerGetDatum(stats))); + else + ok = std_typanalyze(stats); + + if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) + { + heap_freetuple(typtuple); + pfree(stats->attr); + pfree(stats); + return NULL; + } + + return stats; +} + +/* + * examine_expression -- pre-analysis of a single expression + * + * Determine whether the expression is analyzable; if so, create and initialize + * a VacAttrStats struct for it. If not, return NULL. + */ +static VacAttrStats * +examine_expression(Node *expr, int stattarget) +{ + HeapTuple typtuple; + VacAttrStats *stats; + int i; + bool ok; + + Assert(expr != NULL); + + /* + * Create the VacAttrStats struct. + */ + stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); + + /* + * When analyzing an expression, believe the expression tree's type. + */ + stats->attrtypid = exprType(expr); + stats->attrtypmod = exprTypmod(expr); + + /* + * We don't allow collation to be specified in CREATE STATISTICS, so we + * have to use the collation specified for the expression. It's possible + * to specify the collation in the expression "(col COLLATE "en_US")" in + * which case exprCollation() does the right thing. + */ + stats->attrcollid = exprCollation(expr); + + /* + * We don't have any pg_attribute for expressions, so let's fake something + * reasonable into attstattarget, which is the only thing std_typanalyze + * needs. + */ + stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); + + /* + * We can't have statistics target specified for the expression, so we + * could use either the default_statistics_target, or the target computed + * for the extended statistics. The second option seems more reasonable. + */ + stats->attr->attstattarget = stattarget; + + /* initialize some basic fields */ + stats->attr->attrelid = InvalidOid; + stats->attr->attnum = InvalidAttrNumber; + stats->attr->atttypid = stats->attrtypid; + + typtuple = SearchSysCacheCopy1(TYPEOID, + ObjectIdGetDatum(stats->attrtypid)); + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); + + stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + stats->anl_context = CurrentMemoryContext; /* XXX should be using + * something else? */ + stats->tupattnum = InvalidAttrNumber; + + /* + * The fields describing the stats->stavalues[n] element types default to + * the type of the data being analyzed, but the type-specific typanalyze + * function can change them if it wants to store something else. + */ + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + stats->statypid[i] = stats->attrtypid; + stats->statyplen[i] = stats->attrtype->typlen; + stats->statypbyval[i] = stats->attrtype->typbyval; + stats->statypalign[i] = stats->attrtype->typalign; + } + + /* + * Call the type-specific typanalyze function. If none is specified, use + * std_typanalyze(). + */ + if (OidIsValid(stats->attrtype->typanalyze)) + ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, + PointerGetDatum(stats))); + else + ok = std_typanalyze(stats); + + if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) + { + heap_freetuple(typtuple); + pfree(stats); + return NULL; + } + + return stats; +} + +/* + * Using 'vacatts' of size 'nvacatts' as input data, return a newly-built + * VacAttrStats array which includes only the items corresponding to + * attributes indicated by 'attrs'. If we don't have all of the per-column + * stats available to compute the extended stats, then we return NULL to + * indicate to the caller that the stats should not be built. + */ +static VacAttrStats ** +lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs, + int nvacatts, VacAttrStats **vacatts) +{ + int i = 0; + int x = -1; + int natts; + VacAttrStats **stats; + ListCell *lc; + + natts = bms_num_members(attrs) + list_length(exprs); + + stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *)); + + /* lookup VacAttrStats info for the requested columns (same attnum) */ + while ((x = bms_next_member(attrs, x)) >= 0) + { + int j; + + stats[i] = NULL; + for (j = 0; j < nvacatts; j++) + { + if (x == vacatts[j]->tupattnum) + { + stats[i] = vacatts[j]; + break; + } + } + + if (!stats[i]) + { + /* + * Looks like stats were not gathered for one of the columns + * required. We'll be unable to build the extended stats without + * this column. + */ + pfree(stats); + return NULL; + } + + /* + * Sanity check that the column is not dropped - stats should have + * been removed in this case. + */ + Assert(!stats[i]->attr->attisdropped); + + i++; + } + + /* also add info for expressions */ + foreach(lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + + stats[i] = examine_attribute(expr); + + /* + * XXX We need tuple descriptor later, and we just grab it from + * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded + * examine_attribute does not set that, so just grab it from the first + * vacatts element. + */ + stats[i]->tupDesc = vacatts[0]->tupDesc; + + i++; + } + + return stats; +} + +/* + * statext_store + * Serializes the statistics and stores them into the pg_statistic_ext_data + * tuple. + */ +static void +statext_store(Oid statOid, bool inh, + MVNDistinct *ndistinct, MVDependencies *dependencies, + MCVList *mcv, Datum exprs, VacAttrStats **stats) +{ + Relation pg_stextdata; + HeapTuple stup; + Datum values[Natts_pg_statistic_ext_data]; + bool nulls[Natts_pg_statistic_ext_data]; + + pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock); + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + /* basic info */ + values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid); + nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false; + + values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh); + nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false; + + /* + * Construct a new pg_statistic_ext_data tuple, replacing the calculated + * stats. + */ + if (ndistinct != NULL) + { + bytea *data = statext_ndistinct_serialize(ndistinct); + + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL); + values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data); + } + + if (dependencies != NULL) + { + bytea *data = statext_dependencies_serialize(dependencies); + + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL); + values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data); + } + if (mcv != NULL) + { + bytea *data = statext_mcv_serialize(mcv, stats); + + nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL); + values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data); + } + if (exprs != (Datum) 0) + { + nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false; + values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs; + } + + /* + * Delete the old tuple if it exists, and insert a new one. It's easier + * than trying to update or insert, based on various conditions. + */ + RemoveStatisticsDataById(statOid, inh); + + /* form and insert a new tuple */ + stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls); + CatalogTupleInsert(pg_stextdata, stup); + + heap_freetuple(stup); + + table_close(pg_stextdata, RowExclusiveLock); +} + +/* initialize multi-dimensional sort */ +MultiSortSupport +multi_sort_init(int ndims) +{ + MultiSortSupport mss; + + Assert(ndims >= 2); + + mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup) + + sizeof(SortSupportData) * ndims); + + mss->ndims = ndims; + + return mss; +} + +/* + * Prepare sort support info using the given sort operator and collation + * at the position 'sortdim' + */ +void +multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + Oid oper, Oid collation) +{ + SortSupport ssup = &mss->ssup[sortdim]; + + ssup->ssup_cxt = CurrentMemoryContext; + ssup->ssup_collation = collation; + ssup->ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(oper, ssup); +} + +/* compare all the dimensions in the selected order */ +int +multi_sort_compare(const void *a, const void *b, void *arg) +{ + MultiSortSupport mss = (MultiSortSupport) arg; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + int i; + + for (i = 0; i < mss->ndims; i++) + { + int compare; + + compare = ApplySortComparator(ia->values[i], ia->isnull[i], + ib->values[i], ib->isnull[i], + &mss->ssup[i]); + + if (compare != 0) + return compare; + } + + /* equal by default */ + return 0; +} + +/* compare selected dimension */ +int +multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + return ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); +} + +int +multi_sort_compare_dims(int start, int end, + const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + int dim; + + for (dim = start; dim <= end; dim++) + { + int r = ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); + + if (r != 0) + return r; + } + + return 0; +} + +int +compare_scalars_simple(const void *a, const void *b, void *arg) +{ + return compare_datums_simple(*(Datum *) a, + *(Datum *) b, + (SortSupport) arg); +} + +int +compare_datums_simple(Datum a, Datum b, SortSupport ssup) +{ + return ApplySortComparator(a, false, b, false, ssup); +} + +/* + * build_attnums_array + * Transforms a bitmap into an array of AttrNumber values. + * + * This is used for extended statistics only, so all the attributes must be + * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber + * is not necessary here (and when querying the bitmap). + */ +AttrNumber * +build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs) +{ + int i, + j; + AttrNumber *attnums; + int num = bms_num_members(attrs); + + if (numattrs) + *numattrs = num; + + /* build attnums from the bitmapset */ + attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num); + i = 0; + j = -1; + while ((j = bms_next_member(attrs, j)) >= 0) + { + int attnum = (j - nexprs); + + /* + * Make sure the bitmap contains only user-defined attributes. As + * bitmaps can't contain negative values, this can be violated in two + * ways. Firstly, the bitmap might contain 0 as a member, and secondly + * the integer value might be larger than MaxAttrNumber. + */ + Assert(AttributeNumberIsValid(attnum)); + Assert(attnum <= MaxAttrNumber); + Assert(attnum >= (-nexprs)); + + attnums[i++] = (AttrNumber) attnum; + + /* protect against overflows */ + Assert(i <= num); + } + + return attnums; +} + +/* + * build_sorted_items + * build a sorted array of SortItem with values from rows + * + * Note: All the memory is allocated in a single chunk, so that the caller + * can simply pfree the return value to release all of it. + */ +SortItem * +build_sorted_items(StatsBuildData *data, int *nitems, + MultiSortSupport mss, + int numattrs, AttrNumber *attnums) +{ + int i, + j, + len, + nrows; + int nvalues = data->numrows * numattrs; + + SortItem *items; + Datum *values; + bool *isnull; + char *ptr; + int *typlen; + + /* Compute the total amount of memory we need (both items and values). */ + len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool)); + + /* Allocate the memory and split it into the pieces. */ + ptr = palloc0(len); + + /* items to sort */ + items = (SortItem *) ptr; + ptr += data->numrows * sizeof(SortItem); + + /* values and null flags */ + values = (Datum *) ptr; + ptr += nvalues * sizeof(Datum); + + isnull = (bool *) ptr; + ptr += nvalues * sizeof(bool); + + /* make sure we consumed the whole buffer exactly */ + Assert((ptr - (char *) items) == len); + + /* fix the pointers to Datum and bool arrays */ + nrows = 0; + for (i = 0; i < data->numrows; i++) + { + items[nrows].values = &values[nrows * numattrs]; + items[nrows].isnull = &isnull[nrows * numattrs]; + + nrows++; + } + + /* build a local cache of typlen for all attributes */ + typlen = (int *) palloc(sizeof(int) * data->nattnums); + for (i = 0; i < data->nattnums; i++) + typlen[i] = get_typlen(data->stats[i]->attrtypid); + + nrows = 0; + for (i = 0; i < data->numrows; i++) + { + bool toowide = false; + + /* load the values/null flags from sample rows */ + for (j = 0; j < numattrs; j++) + { + Datum value; + bool isnull; + int attlen; + AttrNumber attnum = attnums[j]; + + int idx; + + /* match attnum to the pre-calculated data */ + for (idx = 0; idx < data->nattnums; idx++) + { + if (attnum == data->attnums[idx]) + break; + } + + Assert(idx < data->nattnums); + + value = data->values[idx][i]; + isnull = data->nulls[idx][i]; + attlen = typlen[idx]; + + /* + * If this is a varlena value, check if it's too wide and if yes + * then skip the whole item. Otherwise detoast the value. + * + * XXX It may happen that we've already detoasted some preceding + * values for the current item. We don't bother to cleanup those + * on the assumption that those are small (below WIDTH_THRESHOLD) + * and will be discarded at the end of analyze. + */ + if ((!isnull) && (attlen == -1)) + { + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) + { + toowide = true; + break; + } + + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + } + + items[nrows].values[j] = value; + items[nrows].isnull[j] = isnull; + } + + if (toowide) + continue; + + nrows++; + } + + /* store the actual number of items (ignoring the too-wide ones) */ + *nitems = nrows; + + /* all items were too wide */ + if (nrows == 0) + { + /* everything is allocated as a single chunk */ + pfree(items); + return NULL; + } + + /* do the sort, using the multi-sort */ + qsort_interruptible(items, nrows, sizeof(SortItem), + multi_sort_compare, mss); + + return items; +} + +/* + * has_stats_of_kind + * Check whether the list contains statistic of a given kind + */ +bool +has_stats_of_kind(List *stats, char requiredkind) +{ + ListCell *l; + + foreach(l, stats) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); + + if (stat->kind == requiredkind) + return true; + } + + return false; +} + +/* + * stat_find_expression + * Search for an expression in statistics object's list of expressions. + * + * Returns the index of the expression in the statistics object's list of + * expressions, or -1 if not found. + */ +static int +stat_find_expression(StatisticExtInfo *stat, Node *expr) +{ + ListCell *lc; + int idx; + + idx = 0; + foreach(lc, stat->exprs) + { + Node *stat_expr = (Node *) lfirst(lc); + + if (equal(stat_expr, expr)) + return idx; + idx++; + } + + /* Expression not found */ + return -1; +} + +/* + * stat_covers_expressions + * Test whether a statistics object covers all expressions in a list. + * + * Returns true if all expressions are covered. If expr_idxs is non-NULL, it + * is populated with the indexes of the expressions found. + */ +static bool +stat_covers_expressions(StatisticExtInfo *stat, List *exprs, + Bitmapset **expr_idxs) +{ + ListCell *lc; + + foreach(lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + int expr_idx; + + expr_idx = stat_find_expression(stat, expr); + if (expr_idx == -1) + return false; + + if (expr_idxs != NULL) + *expr_idxs = bms_add_member(*expr_idxs, expr_idx); + } + + /* If we reach here, all expressions are covered */ + return true; +} + +/* + * choose_best_statistics + * Look for and return statistics with the specified 'requiredkind' which + * have keys that match at least two of the given attnums. Return NULL if + * there's no match. + * + * The current selection criteria is very simple - we choose the statistics + * object referencing the most attributes in covered (and still unestimated + * clauses), breaking ties in favor of objects with fewer keys overall. + * + * The clause_attnums is an array of bitmaps, storing attnums for individual + * clauses. A NULL element means the clause is either incompatible or already + * estimated. + * + * XXX If multiple statistics objects tie on both criteria, then which object + * is chosen depends on the order that they appear in the stats list. Perhaps + * further tiebreakers are needed. + */ +StatisticExtInfo * +choose_best_statistics(List *stats, char requiredkind, bool inh, + Bitmapset **clause_attnums, List **clause_exprs, + int nclauses) +{ + ListCell *lc; + StatisticExtInfo *best_match = NULL; + int best_num_matched = 2; /* goal #1: maximize */ + int best_match_keys = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */ + + foreach(lc, stats) + { + int i; + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *matched_attnums = NULL; + Bitmapset *matched_exprs = NULL; + int num_matched; + int numkeys; + + /* skip statistics that are not of the correct type */ + if (info->kind != requiredkind) + continue; + + /* skip statistics with mismatching inheritance flag */ + if (info->inherit != inh) + continue; + + /* + * Collect attributes and expressions in remaining (unestimated) + * clauses fully covered by this statistic object. + * + * We know already estimated clauses have both clause_attnums and + * clause_exprs set to NULL. We leave the pointers NULL if already + * estimated, or we reset them to NULL after estimating the clause. + */ + for (i = 0; i < nclauses; i++) + { + Bitmapset *expr_idxs = NULL; + + /* ignore incompatible/estimated clauses */ + if (!clause_attnums[i] && !clause_exprs[i]) + continue; + + /* ignore clauses that are not covered by this object */ + if (!bms_is_subset(clause_attnums[i], info->keys) || + !stat_covers_expressions(info, clause_exprs[i], &expr_idxs)) + continue; + + /* record attnums and indexes of expressions covered */ + matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]); + matched_exprs = bms_add_members(matched_exprs, expr_idxs); + } + + num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs); + + bms_free(matched_attnums); + bms_free(matched_exprs); + + /* + * save the actual number of keys in the stats so that we can choose + * the narrowest stats with the most matching keys. + */ + numkeys = bms_num_members(info->keys) + list_length(info->exprs); + + /* + * Use this object when it increases the number of matched attributes + * and expressions or when it matches the same number of attributes + * and expressions but these stats have fewer keys than any previous + * match. + */ + if (num_matched > best_num_matched || + (num_matched == best_num_matched && numkeys < best_match_keys)) + { + best_match = info; + best_num_matched = num_matched; + best_match_keys = numkeys; + } + } + + return best_match; +} + +/* + * statext_is_compatible_clause_internal + * Determines if the clause is compatible with MCV lists. + * + * To be compatible, the given clause must be a combination of supported + * clauses built from Vars or sub-expressions (where a sub-expression is + * something that exactly matches an expression found in statistics objects). + * This function recursively examines the clause and extracts any + * sub-expressions that will need to be matched against statistics. + * + * Currently, we only support the following types of clauses: + * + * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where + * the op is one of ("=", "<", ">", ">=", "<=") + * + * (b) (Var/Expr IS [NOT] NULL) + * + * (c) combinations using AND/OR/NOT + * + * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or + * (Var/Expr op ALL (Const)) + * + * In the future, the range of supported clauses may be expanded to more + * complex cases, for example (Var op Var). + * + * Arguments: + * clause: (sub)clause to be inspected (bare clause, not a RestrictInfo) + * relid: rel that all Vars in clause must belong to + * *attnums: input/output parameter collecting attribute numbers of all + * mentioned Vars. Note that we do not offset the attribute numbers, + * so we can't cope with system columns. + * *exprs: input/output parameter collecting primitive subclauses within + * the clause tree + * + * Returns false if there is something we definitively can't handle. + * On true return, we can proceed to match the *exprs against statistics. + */ +static bool +statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, + Index relid, Bitmapset **attnums, + List **exprs) +{ + /* Look inside any binary-compatible relabeling (as in examine_variable) */ + if (IsA(clause, RelabelType)) + clause = (Node *) ((RelabelType *) clause)->arg; + + /* plain Var references (boolean Vars or recursive checks) */ + if (IsA(clause, Var)) + { + Var *var = (Var *) clause; + + /* Ensure var is from the correct relation */ + if (var->varno != relid) + return false; + + /* we also better ensure the Var is from the current level */ + if (var->varlevelsup > 0) + return false; + + /* + * Also reject system attributes and whole-row Vars (we don't allow + * stats on those). + */ + if (!AttrNumberIsForUserDefinedAttr(var->varattno)) + return false; + + /* OK, record the attnum for later permissions checks. */ + *attnums = bms_add_member(*attnums, var->varattno); + + return true; + } + + /* (Var/Expr op Const) or (Const op Var/Expr) */ + if (is_opclause(clause)) + { + RangeTblEntry *rte = root->simple_rte_array[relid]; + OpExpr *expr = (OpExpr *) clause; + Node *clause_expr; + + /* Only expressions with two arguments are considered compatible. */ + if (list_length(expr->args) != 2) + return false; + + /* Check if the expression has the right shape */ + if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL)) + return false; + + /* + * If it's not one of the supported operators ("=", "<", ">", etc.), + * just ignore the clause, as it's not compatible with MCV lists. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + */ + switch (get_oprrest(expr->opno)) + { + case F_EQSEL: + case F_NEQSEL: + case F_SCALARLTSEL: + case F_SCALARLESEL: + case F_SCALARGTSEL: + case F_SCALARGESEL: + /* supported, will continue with inspection of the Var/Expr */ + break; + + default: + /* other estimators are considered unknown/unsupported */ + return false; + } + + /* + * If there are any securityQuals on the RTE from security barrier + * views or RLS policies, then the user may not have access to all the + * table's data, and we must check that the operator is leak-proof. + * + * If the operator is leaky, then we must ignore this clause for the + * purposes of estimating with MCV lists, otherwise the operator might + * reveal values from the MCV list that the user doesn't have + * permission to see. + */ + if (rte->securityQuals != NIL && + !get_func_leakproof(get_opcode(expr->opno))) + return false; + + /* Check (Var op Const) or (Const op Var) clauses by recursing. */ + if (IsA(clause_expr, Var)) + return statext_is_compatible_clause_internal(root, clause_expr, + relid, attnums, exprs); + + /* Otherwise we have (Expr op Const) or (Const op Expr). */ + *exprs = lappend(*exprs, clause_expr); + return true; + } + + /* Var/Expr IN Array */ + if (IsA(clause, ScalarArrayOpExpr)) + { + RangeTblEntry *rte = root->simple_rte_array[relid]; + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + Node *clause_expr; + bool expronleft; + + /* Only expressions with two arguments are considered compatible. */ + if (list_length(expr->args) != 2) + return false; + + /* Check if the expression has the right shape (one Var, one Const) */ + if (!examine_opclause_args(expr->args, &clause_expr, NULL, &expronleft)) + return false; + + /* We only support Var on left, Const on right */ + if (!expronleft) + return false; + + /* + * If it's not one of the supported operators ("=", "<", ">", etc.), + * just ignore the clause, as it's not compatible with MCV lists. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + */ + switch (get_oprrest(expr->opno)) + { + case F_EQSEL: + case F_NEQSEL: + case F_SCALARLTSEL: + case F_SCALARLESEL: + case F_SCALARGTSEL: + case F_SCALARGESEL: + /* supported, will continue with inspection of the Var/Expr */ + break; + + default: + /* other estimators are considered unknown/unsupported */ + return false; + } + + /* + * If there are any securityQuals on the RTE from security barrier + * views or RLS policies, then the user may not have access to all the + * table's data, and we must check that the operator is leak-proof. + * + * If the operator is leaky, then we must ignore this clause for the + * purposes of estimating with MCV lists, otherwise the operator might + * reveal values from the MCV list that the user doesn't have + * permission to see. + */ + if (rte->securityQuals != NIL && + !get_func_leakproof(get_opcode(expr->opno))) + return false; + + /* Check Var IN Array clauses by recursing. */ + if (IsA(clause_expr, Var)) + return statext_is_compatible_clause_internal(root, clause_expr, + relid, attnums, exprs); + + /* Otherwise we have Expr IN Array. */ + *exprs = lappend(*exprs, clause_expr); + return true; + } + + /* AND/OR/NOT clause */ + if (is_andclause(clause) || + is_orclause(clause) || + is_notclause(clause)) + { + /* + * AND/OR/NOT-clauses are supported if all sub-clauses are supported + * + * Perhaps we could improve this by handling mixed cases, when some of + * the clauses are supported and some are not. Selectivity for the + * supported subclauses would be computed using extended statistics, + * and the remaining clauses would be estimated using the traditional + * algorithm (product of selectivities). + * + * It however seems overly complex, and in a way we already do that + * because if we reject the whole clause as unsupported here, it will + * be eventually passed to clauselist_selectivity() which does exactly + * this (split into supported/unsupported clauses etc). + */ + BoolExpr *expr = (BoolExpr *) clause; + ListCell *lc; + + foreach(lc, expr->args) + { + /* + * If we find an incompatible clause in the arguments, treat the + * whole clause as incompatible. + */ + if (!statext_is_compatible_clause_internal(root, + (Node *) lfirst(lc), + relid, attnums, exprs)) + return false; + } + + return true; + } + + /* Var/Expr IS NULL */ + if (IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + /* Check Var IS NULL clauses by recursing. */ + if (IsA(nt->arg, Var)) + return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), + relid, attnums, exprs); + + /* Otherwise we have Expr IS NULL. */ + *exprs = lappend(*exprs, nt->arg); + return true; + } + + /* + * Treat any other expressions as bare expressions to be matched against + * expressions in statistics objects. + */ + *exprs = lappend(*exprs, clause); + return true; +} + +/* + * statext_is_compatible_clause + * Determines if the clause is compatible with MCV lists. + * + * See statext_is_compatible_clause_internal, above, for the basic rules. + * This layer deals with RestrictInfo superstructure and applies permissions + * checks to verify that it's okay to examine all mentioned Vars. + * + * Arguments: + * clause: clause to be inspected (in RestrictInfo form) + * relid: rel that all Vars in clause must belong to + * *attnums: input/output parameter collecting attribute numbers of all + * mentioned Vars. Note that we do not offset the attribute numbers, + * so we can't cope with system columns. + * *exprs: input/output parameter collecting primitive subclauses within + * the clause tree + * + * Returns false if there is something we definitively can't handle. + * On true return, we can proceed to match the *exprs against statistics. + */ +static bool +statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, + Bitmapset **attnums, List **exprs) +{ + RangeTblEntry *rte = root->simple_rte_array[relid]; + RelOptInfo *rel = root->simple_rel_array[relid]; + RestrictInfo *rinfo; + int clause_relid; + Oid userid; + + /* + * Special-case handling for bare BoolExpr AND clauses, because the + * restrictinfo machinery doesn't build RestrictInfos on top of AND + * clauses. + */ + if (is_andclause(clause)) + { + BoolExpr *expr = (BoolExpr *) clause; + ListCell *lc; + + /* + * Check that each sub-clause is compatible. We expect these to be + * RestrictInfos. + */ + foreach(lc, expr->args) + { + if (!statext_is_compatible_clause(root, (Node *) lfirst(lc), + relid, attnums, exprs)) + return false; + } + + return true; + } + + /* Otherwise it must be a RestrictInfo. */ + if (!IsA(clause, RestrictInfo)) + return false; + rinfo = (RestrictInfo *) clause; + + /* Pseudoconstants are not really interesting here. */ + if (rinfo->pseudoconstant) + return false; + + /* Clauses referencing other varnos are incompatible. */ + if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) || + clause_relid != relid) + return false; + + /* Check the clause and determine what attributes it references. */ + if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, + relid, attnums, exprs)) + return false; + + /* + * Check that the user has permission to read all required attributes. + */ + userid = OidIsValid(rel->userid) ? rel->userid : GetUserId(); + + /* Table-level SELECT privilege is sufficient for all columns */ + if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK) + { + Bitmapset *clause_attnums = NULL; + int attnum = -1; + + /* + * We have to check per-column privileges. *attnums has the attnums + * for individual Vars we saw, but there may also be Vars within + * subexpressions in *exprs. We can use pull_varattnos() to extract + * those, but there's an impedance mismatch: attnums returned by + * pull_varattnos() are offset by FirstLowInvalidHeapAttributeNumber, + * while attnums within *attnums aren't. Convert *attnums to the + * offset style so we can combine the results. + */ + while ((attnum = bms_next_member(*attnums, attnum)) >= 0) + { + clause_attnums = + bms_add_member(clause_attnums, + attnum - FirstLowInvalidHeapAttributeNumber); + } + + /* Now merge attnums from *exprs into clause_attnums */ + if (*exprs != NIL) + pull_varattnos((Node *) *exprs, relid, &clause_attnums); + + attnum = -1; + while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0) + { + /* Undo the offset */ + AttrNumber attno = attnum + FirstLowInvalidHeapAttributeNumber; + + if (attno == InvalidAttrNumber) + { + /* Whole-row reference, so must have access to all columns */ + if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, + ACLMASK_ALL) != ACLCHECK_OK) + return false; + } + else + { + if (pg_attribute_aclcheck(rte->relid, attno, userid, + ACL_SELECT) != ACLCHECK_OK) + return false; + } + } + } + + /* If we reach here, the clause is OK */ + return true; +} + +/* + * statext_mcv_clauselist_selectivity + * Estimate clauses using the best multi-column statistics. + * + * Applies available extended (multi-column) statistics on a table. There may + * be multiple applicable statistics (with respect to the clauses), in which + * case we use greedy approach. In each round we select the best statistic on + * a table (measured by the number of attributes extracted from the clauses + * and covered by it), and compute the selectivity for the supplied clauses. + * We repeat this process with the remaining clauses (if any), until none of + * the available statistics can be used. + * + * One of the main challenges with using MCV lists is how to extrapolate the + * estimate to the data not covered by the MCV list. To do that, we compute + * not only the "MCV selectivity" (selectivities for MCV items matching the + * supplied clauses), but also the following related selectivities: + * + * - simple selectivity: Computed without extended statistics, i.e. as if the + * columns/clauses were independent. + * + * - base selectivity: Similar to simple selectivity, but is computed using + * the extended statistic by adding up the base frequencies (that we compute + * and store for each MCV item) of matching MCV items. + * + * - total selectivity: Selectivity covered by the whole MCV list. + * + * These are passed to mcv_combine_selectivities() which combines them to + * produce a selectivity estimate that makes use of both per-column statistics + * and the multi-column MCV statistics. + * + * 'estimatedclauses' is an input/output parameter. We set bits for the + * 0-based 'clauses' indexes we estimate for and also skip clause items that + * already have a bit set. + */ +static Selectivity +statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, + JoinType jointype, SpecialJoinInfo *sjinfo, + RelOptInfo *rel, Bitmapset **estimatedclauses, + bool is_or) +{ + ListCell *l; + Bitmapset **list_attnums; /* attnums extracted from the clause */ + List **list_exprs; /* expressions matched to any statistic */ + int listidx; + Selectivity sel = (is_or) ? 0.0 : 1.0; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + /* check if there's any stats that might be useful for us. */ + if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV)) + return sel; + + list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) * + list_length(clauses)); + + /* expressions extracted from complex expressions */ + list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses)); + + /* + * Pre-process the clauses list to extract the attnums and expressions + * seen in each item. We need to determine if there are any clauses which + * will be useful for selectivity estimations with extended stats. Along + * the way we'll record all of the attnums and expressions for each clause + * in lists which we'll reference later so we don't need to repeat the + * same work again. + * + * We also skip clauses that we already estimated using different types of + * statistics (we treat them as incompatible). + */ + listidx = 0; + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + Bitmapset *attnums = NULL; + List *exprs = NIL; + + if (!bms_is_member(listidx, *estimatedclauses) && + statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs)) + { + list_attnums[listidx] = attnums; + list_exprs[listidx] = exprs; + } + else + { + list_attnums[listidx] = NULL; + list_exprs[listidx] = NIL; + } + + listidx++; + } + + /* apply as many extended statistics as possible */ + while (true) + { + StatisticExtInfo *stat; + List *stat_clauses; + Bitmapset *simple_clauses; + + /* find the best suited statistics object for these attnums */ + stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh, + list_attnums, list_exprs, + list_length(clauses)); + + /* + * if no (additional) matching stats could be found then we've nothing + * to do + */ + if (!stat) + break; + + /* Ensure choose_best_statistics produced an expected stats type. */ + Assert(stat->kind == STATS_EXT_MCV); + + /* now filter the clauses to be estimated using the selected MCV */ + stat_clauses = NIL; + + /* record which clauses are simple (single column or expression) */ + simple_clauses = NULL; + + listidx = -1; + foreach(l, clauses) + { + /* Increment the index before we decide if to skip the clause. */ + listidx++; + + /* + * Ignore clauses from which we did not extract any attnums or + * expressions (this needs to be consistent with what we do in + * choose_best_statistics). + * + * This also eliminates already estimated clauses - both those + * estimated before and during applying extended statistics. + * + * XXX This check is needed because both bms_is_subset and + * stat_covers_expressions return true for empty attnums and + * expressions. + */ + if (!list_attnums[listidx] && !list_exprs[listidx]) + continue; + + /* + * The clause was not estimated yet, and we've extracted either + * attnums or expressions from it. Ignore it if it's not fully + * covered by the chosen statistics object. + * + * We need to check both attributes and expressions, and reject if + * either is not covered. + */ + if (!bms_is_subset(list_attnums[listidx], stat->keys) || + !stat_covers_expressions(stat, list_exprs[listidx], NULL)) + continue; + + /* + * Now we know the clause is compatible (we have either attnums or + * expressions extracted from it), and was not estimated yet. + */ + + /* record simple clauses (single column or expression) */ + if ((list_attnums[listidx] == NULL && + list_length(list_exprs[listidx]) == 1) || + (list_exprs[listidx] == NIL && + bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) + simple_clauses = bms_add_member(simple_clauses, + list_length(stat_clauses)); + + /* add clause to list and mark it as estimated */ + stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); + *estimatedclauses = bms_add_member(*estimatedclauses, listidx); + + /* + * Reset the pointers, so that choose_best_statistics knows this + * clause was estimated and does not consider it again. + */ + bms_free(list_attnums[listidx]); + list_attnums[listidx] = NULL; + + list_free(list_exprs[listidx]); + list_exprs[listidx] = NULL; + } + + if (is_or) + { + bool *or_matches = NULL; + Selectivity simple_or_sel = 0.0, + stat_sel = 0.0; + MCVList *mcv_list; + + /* Load the MCV list stored in the statistics object */ + mcv_list = statext_mcv_load(stat->statOid, rte->inh); + + /* + * Compute the selectivity of the ORed list of clauses covered by + * this statistics object by estimating each in turn and combining + * them using the formula P(A OR B) = P(A) + P(B) - P(A AND B). + * This allows us to use the multivariate MCV stats to better + * estimate the individual terms and their overlap. + * + * Each time we iterate this formula, the clause "A" above is + * equal to all the clauses processed so far, combined with "OR". + */ + listidx = 0; + foreach(l, stat_clauses) + { + Node *clause = (Node *) lfirst(l); + Selectivity simple_sel, + overlap_simple_sel, + mcv_sel, + mcv_basesel, + overlap_mcvsel, + overlap_basesel, + mcv_totalsel, + clause_sel, + overlap_sel; + + /* + * "Simple" selectivity of the next clause and its overlap + * with any of the previous clauses. These are our initial + * estimates of P(B) and P(A AND B), assuming independence of + * columns/clauses. + */ + simple_sel = clause_selectivity_ext(root, clause, varRelid, + jointype, sjinfo, false); + + overlap_simple_sel = simple_or_sel * simple_sel; + + /* + * New "simple" selectivity of all clauses seen so far, + * assuming independence. + */ + simple_or_sel += simple_sel - overlap_simple_sel; + CLAMP_PROBABILITY(simple_or_sel); + + /* + * Multi-column estimate of this clause using MCV statistics, + * along with base and total selectivities, and corresponding + * selectivities for the overlap term P(A AND B). + */ + mcv_sel = mcv_clause_selectivity_or(root, stat, mcv_list, + clause, &or_matches, + &mcv_basesel, + &overlap_mcvsel, + &overlap_basesel, + &mcv_totalsel); + + /* + * Combine the simple and multi-column estimates. + * + * If this clause is a simple single-column clause, then we + * just use the simple selectivity estimate for it, since the + * multi-column statistics are unlikely to improve on that + * (and in fact could make it worse). For the overlap, we + * always make use of the multi-column statistics. + */ + if (bms_is_member(listidx, simple_clauses)) + clause_sel = simple_sel; + else + clause_sel = mcv_combine_selectivities(simple_sel, + mcv_sel, + mcv_basesel, + mcv_totalsel); + + overlap_sel = mcv_combine_selectivities(overlap_simple_sel, + overlap_mcvsel, + overlap_basesel, + mcv_totalsel); + + /* Factor these into the result for this statistics object */ + stat_sel += clause_sel - overlap_sel; + CLAMP_PROBABILITY(stat_sel); + + listidx++; + } + + /* + * Factor the result for this statistics object into the overall + * result. We treat the results from each separate statistics + * object as independent of one another. + */ + sel = sel + stat_sel - sel * stat_sel; + } + else /* Implicitly-ANDed list of clauses */ + { + Selectivity simple_sel, + mcv_sel, + mcv_basesel, + mcv_totalsel, + stat_sel; + + /* + * "Simple" selectivity, i.e. without any extended statistics, + * essentially assuming independence of the columns/clauses. + */ + simple_sel = clauselist_selectivity_ext(root, stat_clauses, + varRelid, jointype, + sjinfo, false); + + /* + * Multi-column estimate using MCV statistics, along with base and + * total selectivities. + */ + mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, + varRelid, jointype, sjinfo, + rel, &mcv_basesel, + &mcv_totalsel); + + /* Combine the simple and multi-column estimates. */ + stat_sel = mcv_combine_selectivities(simple_sel, + mcv_sel, + mcv_basesel, + mcv_totalsel); + + /* Factor this into the overall result */ + sel *= stat_sel; + } + } + + return sel; +} + +/* + * statext_clauselist_selectivity + * Estimate clauses using the best multi-column statistics. + */ +Selectivity +statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, + JoinType jointype, SpecialJoinInfo *sjinfo, + RelOptInfo *rel, Bitmapset **estimatedclauses, + bool is_or) +{ + Selectivity sel; + + /* First, try estimating clauses using a multivariate MCV list. */ + sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype, + sjinfo, rel, estimatedclauses, is_or); + + /* + * Functional dependencies only work for clauses connected by AND, so for + * OR clauses we're done. + */ + if (is_or) + return sel; + + /* + * Then, apply functional dependencies on the remaining clauses by calling + * dependencies_clauselist_selectivity. Pass 'estimatedclauses' so the + * function can properly skip clauses already estimated above. + * + * The reasoning for applying dependencies last is that the more complex + * stats can track more complex correlations between the attributes, and + * so may be considered more reliable. + * + * For example, MCV list can give us an exact selectivity for values in + * two columns, while functional dependencies can only provide information + * about the overall strength of the dependency. + */ + sel *= dependencies_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + estimatedclauses); + + return sel; +} + +/* + * examine_opclause_args + * Split an operator expression's arguments into Expr and Const parts. + * + * Attempts to match the arguments to either (Expr op Const) or (Const op + * Expr), possibly with a RelabelType on top. When the expression matches this + * form, returns true, otherwise returns false. + * + * Optionally returns pointers to the extracted Expr/Const nodes, when passed + * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag + * specifies on which side of the operator we found the expression node. + */ +bool +examine_opclause_args(List *args, Node **exprp, Const **cstp, + bool *expronleftp) +{ + Node *expr; + Const *cst; + bool expronleft; + Node *leftop, + *rightop; + + /* enforced by statext_is_compatible_clause_internal */ + Assert(list_length(args) == 2); + + leftop = linitial(args); + rightop = lsecond(args); + + /* strip RelabelType from either side of the expression */ + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + if (IsA(rightop, Const)) + { + expr = (Node *) leftop; + cst = (Const *) rightop; + expronleft = true; + } + else if (IsA(leftop, Const)) + { + expr = (Node *) rightop; + cst = (Const *) leftop; + expronleft = false; + } + else + return false; + + /* return pointers to the extracted parts if requested */ + if (exprp) + *exprp = expr; + + if (cstp) + *cstp = cst; + + if (expronleftp) + *expronleftp = expronleft; + + return true; +} + + +/* + * Compute statistics about expressions of a relation. + */ +static void +compute_expr_stats(Relation onerel, double totalrows, + AnlExprData *exprdata, int nexprs, + HeapTuple *rows, int numrows) +{ + MemoryContext expr_context, + old_context; + int ind, + i; + + expr_context = AllocSetContextCreate(CurrentMemoryContext, + "Analyze Expression", + ALLOCSET_DEFAULT_SIZES); + old_context = MemoryContextSwitchTo(expr_context); + + for (ind = 0; ind < nexprs; ind++) + { + AnlExprData *thisdata = &exprdata[ind]; + VacAttrStats *stats = thisdata->vacattrstat; + Node *expr = thisdata->expr; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + Datum *exprvals; + bool *exprnulls; + ExprState *exprstate; + int tcnt; + + /* Are we still in the main context? */ + Assert(CurrentMemoryContext == expr_context); + + /* + * Need an EState for evaluation of expressions. Create it in the + * per-expression context to be sure it gets cleaned up at the bottom + * of the loop. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + + /* Set up expression evaluation state */ + exprstate = ExecPrepareExpr((Expr *) expr, estate); + + /* Need a slot to hold the current heap tuple, too */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Compute and save expression values */ + exprvals = (Datum *) palloc(numrows * sizeof(Datum)); + exprnulls = (bool *) palloc(numrows * sizeof(bool)); + + tcnt = 0; + for (i = 0; i < numrows; i++) + { + Datum datum; + bool isnull; + + /* + * Reset the per-tuple context each time, to reclaim any cruft + * left behind by evaluating the statistics expressions. + */ + ResetExprContext(econtext); + + /* Set up for expression evaluation */ + ExecStoreHeapTuple(rows[i], slot, false); + + /* + * Evaluate the expression. We do this in the per-tuple context so + * as not to leak memory, and then copy the result into the + * context created at the beginning of this function. + */ + datum = ExecEvalExprSwitchContext(exprstate, + GetPerTupleExprContext(estate), + &isnull); + if (isnull) + { + exprvals[tcnt] = (Datum) 0; + exprnulls[tcnt] = true; + } + else + { + /* Make sure we copy the data into the context. */ + Assert(CurrentMemoryContext == expr_context); + + exprvals[tcnt] = datumCopy(datum, + stats->attrtype->typbyval, + stats->attrtype->typlen); + exprnulls[tcnt] = false; + } + + tcnt++; + } + + /* + * Now we can compute the statistics for the expression columns. + * + * XXX Unlike compute_index_stats we don't need to switch and reset + * memory contexts here, because we're only computing stats for a + * single expression (and not iterating over many indexes), so we just + * do it in expr_context. Note that compute_stats copies the result + * into stats->anl_context, so it does not disappear. + */ + if (tcnt > 0) + { + AttributeOpts *aopt = + get_attribute_options(stats->attr->attrelid, + stats->attr->attnum); + + stats->exprvals = exprvals; + stats->exprnulls = exprnulls; + stats->rowstride = 1; + stats->compute_stats(stats, + expr_fetch_func, + tcnt, + tcnt); + + /* + * If the n_distinct option is specified, it overrides the above + * computation. + */ + if (aopt != NULL && aopt->n_distinct != 0.0) + stats->stadistinct = aopt->n_distinct; + } + + /* And clean up */ + MemoryContextSwitchTo(expr_context); + + ExecDropSingleTupleTableSlot(slot); + FreeExecutorState(estate); + MemoryContextResetAndDeleteChildren(expr_context); + } + + MemoryContextSwitchTo(old_context); + MemoryContextDelete(expr_context); +} + + +/* + * Fetch function for analyzing statistics object expressions. + * + * We have not bothered to construct tuples from the data, instead the data + * is just in Datum arrays. + */ +static Datum +expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) +{ + int i; + + /* exprvals and exprnulls are already offset for proper column */ + i = rownum * stats->rowstride; + *isNull = stats->exprnulls[i]; + return stats->exprvals[i]; +} + +/* + * Build analyze data for a list of expressions. As this is not tied + * directly to a relation (table or index), we have to fake some of + * the fields in examine_expression(). + */ +static AnlExprData * +build_expr_data(List *exprs, int stattarget) +{ + int idx; + int nexprs = list_length(exprs); + AnlExprData *exprdata; + ListCell *lc; + + exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData)); + + idx = 0; + foreach(lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + AnlExprData *thisdata = &exprdata[idx]; + + thisdata->expr = expr; + thisdata->vacattrstat = examine_expression(expr, stattarget); + idx++; + } + + return exprdata; +} + +/* form an array of pg_statistic rows (per update_attstats) */ +static Datum +serialize_expr_stats(AnlExprData *exprdata, int nexprs) +{ + int exprno; + Oid typOid; + Relation sd; + + ArrayBuildState *astate = NULL; + + sd = table_open(StatisticRelationId, RowExclusiveLock); + + /* lookup OID of composite type for pg_statistic */ + typOid = get_rel_type_id(StatisticRelationId); + if (!OidIsValid(typOid)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" does not have a composite type", + "pg_statistic"))); + + for (exprno = 0; exprno < nexprs; exprno++) + { + int i, + k; + VacAttrStats *stats = exprdata[exprno].vacattrstat; + + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + HeapTuple stup; + + if (!stats->stats_valid) + { + astate = accumArrayResult(astate, + (Datum) 0, + true, + typOid, + CurrentMemoryContext); + continue; + } + + /* + * Construct a new pg_statistic tuple + */ + for (i = 0; i < Natts_pg_statistic; ++i) + { + nulls[i] = false; + } + + values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid); + values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber); + values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false); + values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); + values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); + values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); + i = Anum_pg_statistic_stakind1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ + } + i = Anum_pg_statistic_staop1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ + } + i = Anum_pg_statistic_stacoll1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ + } + i = Anum_pg_statistic_stanumbers1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int nnum = stats->numnumbers[k]; + + if (nnum > 0) + { + int n; + Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); + ArrayType *arry; + + for (n = 0; n < nnum; n++) + numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); + arry = construct_array_builtin(numdatums, nnum, FLOAT4OID); + values[i++] = PointerGetDatum(arry); /* stanumbersN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + i = Anum_pg_statistic_stavalues1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + if (stats->numvalues[k] > 0) + { + ArrayType *arry; + + arry = construct_array(stats->stavalues[k], + stats->numvalues[k], + stats->statypid[k], + stats->statyplen[k], + stats->statypbyval[k], + stats->statypalign[k]); + values[i++] = PointerGetDatum(arry); /* stavaluesN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + + stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); + + astate = accumArrayResult(astate, + heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)), + false, + typOid, + CurrentMemoryContext); + } + + table_close(sd, RowExclusiveLock); + + return makeArrayResult(astate, CurrentMemoryContext); +} + +/* + * Loads pg_statistic record from expression statistics for expression + * identified by the supplied index. + */ +HeapTuple +statext_expressions_load(Oid stxoid, bool inh, int idx) +{ + bool isnull; + Datum value; + HeapTuple htup; + ExpandedArrayHeader *eah; + HeapTupleHeader td; + HeapTupleData tmptup; + HeapTuple tup; + + htup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(stxoid), BoolGetDatum(inh)); + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", stxoid); + + value = SysCacheGetAttr(STATEXTDATASTXOID, htup, + Anum_pg_statistic_ext_data_stxdexpr, &isnull); + if (isnull) + elog(ERROR, + "requested statistics kind \"%c\" is not yet built for statistics object %u", + STATS_EXT_DEPENDENCIES, stxoid); + + eah = DatumGetExpandedArray(value); + + deconstruct_expanded_array(eah); + + td = DatumGetHeapTupleHeader(eah->dvalues[idx]); + + /* Build a temporary HeapTuple control structure */ + tmptup.t_len = HeapTupleHeaderGetDatumLength(td); + ItemPointerSetInvalid(&(tmptup.t_self)); + tmptup.t_tableOid = InvalidOid; + tmptup.t_data = td; + + tup = heap_copytuple(&tmptup); + + ReleaseSysCache(htup); + + return tup; +} + +/* + * Evaluate the expressions, so that we can use the results to build + * all the requested statistics types. This matters especially for + * expensive expressions, of course. + */ +static StatsBuildData * +make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, + VacAttrStats **stats, int stattarget) +{ + /* evaluated expressions */ + StatsBuildData *result; + char *ptr; + Size len; + + int i; + int k; + int idx; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + List *exprstates = NIL; + int nkeys = bms_num_members(stat->columns) + list_length(stat->exprs); + ListCell *lc; + + /* allocate everything as a single chunk, so we can free it easily */ + len = MAXALIGN(sizeof(StatsBuildData)); + len += MAXALIGN(sizeof(AttrNumber) * nkeys); /* attnums */ + len += MAXALIGN(sizeof(VacAttrStats *) * nkeys); /* stats */ + + /* values */ + len += MAXALIGN(sizeof(Datum *) * nkeys); + len += nkeys * MAXALIGN(sizeof(Datum) * numrows); + + /* nulls */ + len += MAXALIGN(sizeof(bool *) * nkeys); + len += nkeys * MAXALIGN(sizeof(bool) * numrows); + + ptr = palloc(len); + + /* set the pointers */ + result = (StatsBuildData *) ptr; + ptr += MAXALIGN(sizeof(StatsBuildData)); + + /* attnums */ + result->attnums = (AttrNumber *) ptr; + ptr += MAXALIGN(sizeof(AttrNumber) * nkeys); + + /* stats */ + result->stats = (VacAttrStats **) ptr; + ptr += MAXALIGN(sizeof(VacAttrStats *) * nkeys); + + /* values */ + result->values = (Datum **) ptr; + ptr += MAXALIGN(sizeof(Datum *) * nkeys); + + /* nulls */ + result->nulls = (bool **) ptr; + ptr += MAXALIGN(sizeof(bool *) * nkeys); + + for (i = 0; i < nkeys; i++) + { + result->values[i] = (Datum *) ptr; + ptr += MAXALIGN(sizeof(Datum) * numrows); + + result->nulls[i] = (bool *) ptr; + ptr += MAXALIGN(sizeof(bool) * numrows); + } + + Assert((ptr - (char *) result) == len); + + /* we have it allocated, so let's fill the values */ + result->nattnums = nkeys; + result->numrows = numrows; + + /* fill the attribute info - first attributes, then expressions */ + idx = 0; + k = -1; + while ((k = bms_next_member(stat->columns, k)) >= 0) + { + result->attnums[idx] = k; + result->stats[idx] = stats[idx]; + + idx++; + } + + k = -1; + foreach(lc, stat->exprs) + { + Node *expr = (Node *) lfirst(lc); + + result->attnums[idx] = k; + result->stats[idx] = examine_expression(expr, stattarget); + + idx++; + k--; + } + + /* first extract values for all the regular attributes */ + for (i = 0; i < numrows; i++) + { + idx = 0; + k = -1; + while ((k = bms_next_member(stat->columns, k)) >= 0) + { + result->values[idx][i] = heap_getattr(rows[i], k, + result->stats[idx]->tupDesc, + &result->nulls[idx][i]); + + idx++; + } + } + + /* Need an EState for evaluation expressions. */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + + /* Need a slot to hold the current heap tuple, too */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(rel), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up expression evaluation state */ + exprstates = ExecPrepareExprList(stat->exprs, estate); + + for (i = 0; i < numrows; i++) + { + /* + * Reset the per-tuple context each time, to reclaim any cruft left + * behind by evaluating the statistics object expressions. + */ + ResetExprContext(econtext); + + /* Set up for expression evaluation */ + ExecStoreHeapTuple(rows[i], slot, false); + + idx = bms_num_members(stat->columns); + foreach(lc, exprstates) + { + Datum datum; + bool isnull; + ExprState *exprstate = (ExprState *) lfirst(lc); + + /* + * XXX This probably leaks memory. Maybe we should use + * ExecEvalExprSwitchContext but then we need to copy the result + * somewhere else. + */ + datum = ExecEvalExpr(exprstate, + GetPerTupleExprContext(estate), + &isnull); + if (isnull) + { + result->values[idx][i] = (Datum) 0; + result->nulls[idx][i] = true; + } + else + { + result->values[idx][i] = (Datum) datum; + result->nulls[idx][i] = false; + } + + idx++; + } + } + + ExecDropSingleTupleTableSlot(slot); + FreeExecutorState(estate); + + return result; +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c new file mode 100644 index 0000000..03b9f04 --- /dev/null +++ b/src/backend/statistics/mcv.c @@ -0,0 +1,2179 @@ +/*------------------------------------------------------------------------- + * + * mcv.c + * POSTGRES multivariate MCV lists + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mcv.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "fmgr.h" +#include "funcapi.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgroids.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* + * Computes size of a serialized MCV item, depending on the number of + * dimensions (columns) the statistic is defined on. The datum values are + * stored in a separate array (deduplicated, to minimize the size), and + * so the serialized items only store uint16 indexes into that array. + * + * Each serialized item stores (in this order): + * + * - indexes to values (ndim * sizeof(uint16)) + * - null flags (ndim * sizeof(bool)) + * - frequency (sizeof(double)) + * - base_frequency (sizeof(double)) + * + * There is no alignment padding within an MCV item. + * So in total each MCV item requires this many bytes: + * + * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double) + */ +#define ITEM_SIZE(ndims) \ + ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)) + +/* + * Used to compute size of serialized MCV list representation. + */ +#define MinSizeOfMCVList \ + (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber)) + +/* + * Size of the serialized MCV list, excluding the space needed for + * deduplicated per-dimension values. The macro is meant to be used + * when it's not yet safe to access the serialized info about amount + * of data for each column. + */ +#define SizeOfMCVList(ndims,nitems) \ + ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \ + ((ndims) * sizeof(DimensionInfo)) + \ + ((nitems) * ITEM_SIZE(ndims))) + +static MultiSortSupport build_mss(StatsBuildData *data); + +static SortItem *build_distinct_groups(int numrows, SortItem *items, + MultiSortSupport mss, int *ndistinct); + +static SortItem **build_column_frequencies(SortItem *groups, int ngroups, + MultiSortSupport mss, int *ncounts); + +static int count_distinct_groups(int numrows, SortItem *items, + MultiSortSupport mss); + +/* + * Compute new value for bitmap item, considering whether it's used for + * clauses connected by AND/OR. + */ +#define RESULT_MERGE(value, is_or, match) \ + ((is_or) ? ((value) || (match)) : ((value) && (match))) + +/* + * When processing a list of clauses, the bitmap item may get set to a value + * such that additional clauses can't change it. For example, when processing + * a list of clauses connected to AND, as soon as the item gets set to 'false' + * then it'll remain like that. Similarly clauses connected by OR and 'true'. + * + * Returns true when the value in the bitmap can't change no matter how the + * remaining clauses are evaluated. + */ +#define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value))) + +/* + * get_mincount_for_mcv_list + * Determine the minimum number of times a value needs to appear in + * the sample for it to be included in the MCV list. + * + * We want to keep only values that appear sufficiently often in the + * sample that it is reasonable to extrapolate their sample frequencies to + * the entire table. We do this by placing an upper bound on the relative + * standard error of the sample frequency, so that any estimates the + * planner generates from the MCV statistics can be expected to be + * reasonably accurate. + * + * Since we are sampling without replacement, the sample frequency of a + * particular value is described by a hypergeometric distribution. A + * common rule of thumb when estimating errors in this situation is to + * require at least 10 instances of the value in the sample, in which case + * the distribution can be approximated by a normal distribution, and + * standard error analysis techniques can be applied. Given a sample size + * of n, a population size of N, and a sample frequency of p=cnt/n, the + * standard error of the proportion p is given by + * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1)) + * where the second term is the finite population correction. To get + * reasonably accurate planner estimates, we impose an upper bound on the + * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative + * error bound is fairly arbitrary, but has been found empirically to work + * well. Rearranging this formula gives a lower bound on the number of + * instances of the value seen: + * cnt > n*(N-n) / (N-n+0.04*n*(N-1)) + * This bound is at most 25, and approaches 0 as n approaches 0 or N. The + * case where n approaches 0 cannot happen in practice, since the sample + * size is at least 300. The case where n approaches N corresponds to + * sampling the whole table, in which case it is reasonable to keep + * the whole MCV list (have no lower bound), so it makes sense to apply + * this formula for all inputs, even though the above derivation is + * technically only valid when the right hand side is at least around 10. + * + * An alternative way to look at this formula is as follows -- assume that + * the number of instances of the value seen scales up to the entire + * table, so that the population count is K=N*cnt/n. Then the distribution + * in the sample is a hypergeometric distribution parameterised by N, n + * and K, and the bound above is mathematically equivalent to demanding + * that the standard deviation of that distribution is less than 20% of + * its mean. Thus the relative errors in any planner estimates produced + * from the MCV statistics are likely to be not too large. + */ +static double +get_mincount_for_mcv_list(int samplerows, double totalrows) +{ + double n = samplerows; + double N = totalrows; + double numer, + denom; + + numer = n * (N - n); + denom = N - n + 0.04 * n * (N - 1); + + /* Guard against division by zero (possible if n = N = 1) */ + if (denom == 0.0) + return 0.0; + + return numer / denom; +} + +/* + * Builds MCV list from the set of sampled rows. + * + * The algorithm is quite simple: + * + * (1) sort the data (default collation, '<' for the data type) + * + * (2) count distinct groups, decide how many to keep + * + * (3) build the MCV list using the threshold determined in (2) + * + * (4) remove rows represented by the MCV from the sample + * + */ +MCVList * +statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget) +{ + int i, + numattrs, + numrows, + ngroups, + nitems; + double mincount; + SortItem *items; + SortItem *groups; + MCVList *mcvlist = NULL; + MultiSortSupport mss; + + /* comparator for all the columns */ + mss = build_mss(data); + + /* sort the rows */ + items = build_sorted_items(data, &nitems, mss, + data->nattnums, data->attnums); + + if (!items) + return NULL; + + /* for convenience */ + numattrs = data->nattnums; + numrows = data->numrows; + + /* transform the sorted rows into groups (sorted by frequency) */ + groups = build_distinct_groups(nitems, items, mss, &ngroups); + + /* + * The maximum number of MCV items to store, based on the statistics + * target we computed for the statistics object (from the target set for + * the object itself, attributes and the system default). In any case, we + * can't keep more groups than we have available. + */ + nitems = stattarget; + if (nitems > ngroups) + nitems = ngroups; + + /* + * Decide how many items to keep in the MCV list. We can't use the same + * algorithm as per-column MCV lists, because that only considers the + * actual group frequency - but we're primarily interested in how the + * actual frequency differs from the base frequency (product of simple + * per-column frequencies, as if the columns were independent). + * + * Using the same algorithm might exclude items that are close to the + * "average" frequency of the sample. But that does not say whether the + * observed frequency is close to the base frequency or not. We also need + * to consider unexpectedly uncommon items (again, compared to the base + * frequency), and the single-column algorithm does not have to. + * + * We simply decide how many items to keep by computing the minimum count + * using get_mincount_for_mcv_list() and then keep all items that seem to + * be more common than that. + */ + mincount = get_mincount_for_mcv_list(numrows, totalrows); + + /* + * Walk the groups until we find the first group with a count below the + * mincount threshold (the index of that group is the number of groups we + * want to keep). + */ + for (i = 0; i < nitems; i++) + { + if (groups[i].count < mincount) + { + nitems = i; + break; + } + } + + /* + * At this point, we know the number of items for the MCV list. There + * might be none (for uniform distribution with many groups), and in that + * case, there will be no MCV list. Otherwise, construct the MCV list. + */ + if (nitems > 0) + { + int j; + SortItem key; + MultiSortSupport tmp; + + /* frequencies for values in each attribute */ + SortItem **freqs; + int *nfreqs; + + /* used to search values */ + tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup) + + sizeof(SortSupportData)); + + /* compute frequencies for values in each column */ + nfreqs = (int *) palloc0(sizeof(int) * numattrs); + freqs = build_column_frequencies(groups, ngroups, mss, nfreqs); + + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + sizeof(MCVItem) * nitems); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* store info about data type OIDs */ + for (i = 0; i < numattrs; i++) + mcvlist->types[i] = data->stats[i]->attrtypid; + + /* Copy the first chunk of groups into the result. */ + for (i = 0; i < nitems; i++) + { + /* just point to the proper place in the list */ + MCVItem *item = &mcvlist->items[i]; + + item->values = (Datum *) palloc(sizeof(Datum) * numattrs); + item->isnull = (bool *) palloc(sizeof(bool) * numattrs); + + /* copy values for the group */ + memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs); + memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs); + + /* groups should be sorted by frequency in descending order */ + Assert((i == 0) || (groups[i - 1].count >= groups[i].count)); + + /* group frequency */ + item->frequency = (double) groups[i].count / numrows; + + /* base frequency, if the attributes were independent */ + item->base_frequency = 1.0; + for (j = 0; j < numattrs; j++) + { + SortItem *freq; + + /* single dimension */ + tmp->ndims = 1; + tmp->ssup[0] = mss->ssup[j]; + + /* fill search key */ + key.values = &groups[i].values[j]; + key.isnull = &groups[i].isnull[j]; + + freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j], + sizeof(SortItem), + multi_sort_compare, tmp); + + item->base_frequency *= ((double) freq->count) / numrows; + } + } + + pfree(nfreqs); + pfree(freqs); + } + + pfree(items); + pfree(groups); + + return mcvlist; +} + +/* + * build_mss + * Build a MultiSortSupport for the given StatsBuildData. + */ +static MultiSortSupport +build_mss(StatsBuildData *data) +{ + int i; + int numattrs = data->nattnums; + + /* Sort by multiple columns (using array of SortSupport) */ + MultiSortSupport mss = multi_sort_init(numattrs); + + /* prepare the sort functions for all the attributes */ + for (i = 0; i < numattrs; i++) + { + VacAttrStats *colstat = data->stats[i]; + TypeCacheEntry *type; + + type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + colstat->attrtypid); + + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); + } + + return mss; +} + +/* + * count_distinct_groups + * Count distinct combinations of SortItems in the array. + * + * The array is assumed to be sorted according to the MultiSortSupport. + */ +static int +count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss) +{ + int i; + int ndistinct; + + ndistinct = 1; + for (i = 1; i < numrows; i++) + { + /* make sure the array really is sorted */ + Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); + + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + ndistinct += 1; + } + + return ndistinct; +} + +/* + * compare_sort_item_count + * Comparator for sorting items by count (frequencies) in descending + * order. + */ +static int +compare_sort_item_count(const void *a, const void *b, void *arg) +{ + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + if (ia->count == ib->count) + return 0; + else if (ia->count > ib->count) + return -1; + + return 1; +} + +/* + * build_distinct_groups + * Build an array of SortItems for distinct groups and counts matching + * items. + * + * The 'items' array is assumed to be sorted. + */ +static SortItem * +build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, + int *ndistinct) +{ + int i, + j; + int ngroups = count_distinct_groups(numrows, items, mss); + + SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem)); + + j = 0; + groups[0] = items[0]; + groups[0].count = 1; + + for (i = 1; i < numrows; i++) + { + /* Assume sorted in ascending order. */ + Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); + + /* New distinct group detected. */ + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + { + groups[++j] = items[i]; + groups[j].count = 0; + } + + groups[j].count++; + } + + /* ensure we filled the expected number of distinct groups */ + Assert(j + 1 == ngroups); + + /* Sort the distinct groups by frequency (in descending order). */ + qsort_interruptible(groups, ngroups, sizeof(SortItem), + compare_sort_item_count, NULL); + + *ndistinct = ngroups; + return groups; +} + +/* compare sort items (single dimension) */ +static int +sort_item_compare(const void *a, const void *b, void *arg) +{ + SortSupport ssup = (SortSupport) arg; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + return ApplySortComparator(ia->values[0], ia->isnull[0], + ib->values[0], ib->isnull[0], + ssup); +} + +/* + * build_column_frequencies + * Compute frequencies of values in each column. + * + * This returns an array of SortItems for each attribute the MCV is built + * on, with a frequency (number of occurrences) for each value. This is + * then used to compute "base" frequency of MCV items. + * + * All the memory is allocated in a single chunk, so that a single pfree + * is enough to release it. We do not allocate space for values/isnull + * arrays in the SortItems, because we can simply point into the input + * groups directly. + */ +static SortItem ** +build_column_frequencies(SortItem *groups, int ngroups, + MultiSortSupport mss, int *ncounts) +{ + int i, + dim; + SortItem **result; + char *ptr; + + Assert(groups); + Assert(ncounts); + + /* allocate arrays for all columns as a single chunk */ + ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) + + mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups)); + + /* initial array of pointers */ + result = (SortItem **) ptr; + ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims); + + for (dim = 0; dim < mss->ndims; dim++) + { + SortSupport ssup = &mss->ssup[dim]; + + /* array of values for a single column */ + result[dim] = (SortItem *) ptr; + ptr += MAXALIGN(sizeof(SortItem) * ngroups); + + /* extract data for the dimension */ + for (i = 0; i < ngroups; i++) + { + /* point into the input groups */ + result[dim][i].values = &groups[i].values[dim]; + result[dim][i].isnull = &groups[i].isnull[dim]; + result[dim][i].count = groups[i].count; + } + + /* sort the values, deduplicate */ + qsort_interruptible(result[dim], ngroups, sizeof(SortItem), + sort_item_compare, ssup); + + /* + * Identify distinct values, compute frequency (there might be + * multiple MCV items containing this value, so we need to sum counts + * from all of them. + */ + ncounts[dim] = 1; + for (i = 1; i < ngroups; i++) + { + if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0) + { + result[dim][ncounts[dim] - 1].count += result[dim][i].count; + continue; + } + + result[dim][ncounts[dim]] = result[dim][i]; + + ncounts[dim]++; + } + } + + return result; +} + +/* + * statext_mcv_load + * Load the MCV list for the indicated pg_statistic_ext_data tuple. + */ +MCVList * +statext_mcv_load(Oid mvoid, bool inh) +{ + MCVList *result; + bool isnull; + Datum mcvlist; + HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(mvoid), BoolGetDatum(inh)); + + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", mvoid); + + mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup, + Anum_pg_statistic_ext_data_stxdmcv, &isnull); + + if (isnull) + elog(ERROR, + "requested statistics kind \"%c\" is not yet built for statistics object %u", + STATS_EXT_DEPENDENCIES, mvoid); + + result = statext_mcv_deserialize(DatumGetByteaP(mcvlist)); + + ReleaseSysCache(htup); + + return result; +} + + +/* + * statext_mcv_serialize + * Serialize MCV list into a pg_mcv_list value. + * + * The MCV items may include values of various data types, and it's reasonable + * to expect redundancy (values for a given attribute, repeated for multiple + * MCV list items). So we deduplicate the values into arrays, and then replace + * the values by indexes into those arrays. + * + * The overall structure of the serialized representation looks like this: + * + * +---------------+----------------+---------------------+-------+ + * | header fields | dimension info | deduplicated values | items | + * +---------------+----------------+---------------------+-------+ + * + * Where dimension info stores information about the type of the K-th + * attribute (e.g. typlen, typbyval and length of deduplicated values). + * Deduplicated values store deduplicated values for each attribute. And + * items store the actual MCV list items, with values replaced by indexes into + * the arrays. + * + * When serializing the items, we use uint16 indexes. The number of MCV items + * is limited by the statistics target (which is capped to 10k at the moment). + * We might increase this to 65k and still fit into uint16, so there's a bit of + * slack. Furthermore, this limit is on the number of distinct values per column, + * and we usually have few of those (and various combinations of them for the + * those MCV list). So uint16 seems fine for now. + * + * We don't really expect the serialization to save as much space as for + * histograms, as we are not doing any bucket splits (which is the source + * of high redundancy in histograms). + * + * TODO: Consider packing boolean flags (NULL) for each item into a single char + * (or a longer type) instead of using an array of bool items. + */ +bytea * +statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) +{ + int i; + int dim; + int ndims = mcvlist->ndimensions; + + SortSupport ssup; + DimensionInfo *info; + + Size total_length; + + /* serialized items (indexes into arrays, etc.) */ + bytea *raw; + char *ptr; + char *endptr PG_USED_FOR_ASSERTS_ONLY; + + /* values per dimension (and number of non-NULL values) */ + Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims); + int *counts = (int *) palloc0(sizeof(int) * ndims); + + /* + * We'll include some rudimentary information about the attribute types + * (length, by-val flag), so that we don't have to look them up while + * deserializing the MCV list (we already have the type OID in the + * header). This is safe because when changing the type of the attribute + * the statistics gets dropped automatically. We need to store the info + * about the arrays of deduplicated values anyway. + */ + info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims); + + /* sort support data for all attributes included in the MCV list */ + ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims); + + /* collect and deduplicate values for each dimension (attribute) */ + for (dim = 0; dim < ndims; dim++) + { + int ndistinct; + TypeCacheEntry *typentry; + + /* + * Lookup the LT operator (can't get it from stats extra_data, as we + * don't know how to interpret that - scalar vs. array etc.). + */ + typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR); + + /* copy important info about the data type (length, by-value) */ + info[dim].typlen = stats[dim]->attrtype->typlen; + info[dim].typbyval = stats[dim]->attrtype->typbyval; + + /* allocate space for values in the attribute and collect them */ + values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems); + + for (i = 0; i < mcvlist->nitems; i++) + { + /* skip NULL values - we don't need to deduplicate those */ + if (mcvlist->items[i].isnull[dim]) + continue; + + /* append the value at the end */ + values[dim][counts[dim]] = mcvlist->items[i].values[dim]; + counts[dim] += 1; + } + + /* if there are just NULL values in this dimension, we're done */ + if (counts[dim] == 0) + continue; + + /* sort and deduplicate the data */ + ssup[dim].ssup_cxt = CurrentMemoryContext; + ssup[dim].ssup_collation = stats[dim]->attrcollid; + ssup[dim].ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); + + qsort_interruptible(values[dim], counts[dim], sizeof(Datum), + compare_scalars_simple, &ssup[dim]); + + /* + * Walk through the array and eliminate duplicate values, but keep the + * ordering (so that we can do a binary search later). We know there's + * at least one item as (counts[dim] != 0), so we can skip the first + * element. + */ + ndistinct = 1; /* number of distinct values */ + for (i = 1; i < counts[dim]; i++) + { + /* expect sorted array */ + Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0); + + /* if the value is the same as the previous one, we can skip it */ + if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim])) + continue; + + values[dim][ndistinct] = values[dim][i]; + ndistinct += 1; + } + + /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */ + Assert(ndistinct <= PG_UINT16_MAX); + + /* + * Store additional info about the attribute - number of deduplicated + * values, and also size of the serialized data. For fixed-length data + * types this is trivial to compute, for varwidth types we need to + * actually walk the array and sum the sizes. + */ + info[dim].nvalues = ndistinct; + + if (info[dim].typbyval) /* by-value data types */ + { + info[dim].nbytes = info[dim].nvalues * info[dim].typlen; + + /* + * We copy the data into the MCV item during deserialization, so + * we don't need to allocate any extra space. + */ + info[dim].nbytes_aligned = 0; + } + else if (info[dim].typlen > 0) /* fixed-length by-ref */ + { + /* + * We don't care about alignment in the serialized data, so we + * pack the data as much as possible. But we also track how much + * data will be needed after deserialization, and in that case we + * need to account for alignment of each item. + * + * Note: As the items are fixed-length, we could easily compute + * this during deserialization, but we do it here anyway. + */ + info[dim].nbytes = info[dim].nvalues * info[dim].typlen; + info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen); + } + else if (info[dim].typlen == -1) /* varlena */ + { + info[dim].nbytes = 0; + info[dim].nbytes_aligned = 0; + for (i = 0; i < info[dim].nvalues; i++) + { + Size len; + + /* + * For varlena values, we detoast the values and store the + * length and data separately. We don't bother with alignment + * here, which means that during deserialization we need to + * copy the fields and only access the copies. + */ + values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); + + /* serialized length (uint32 length + data) */ + len = VARSIZE_ANY_EXHDR(values[dim][i]); + info[dim].nbytes += sizeof(uint32); /* length */ + info[dim].nbytes += len; /* value (no header) */ + + /* + * During deserialization we'll build regular varlena values + * with full headers, and we need to align them properly. + */ + info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len); + } + } + else if (info[dim].typlen == -2) /* cstring */ + { + info[dim].nbytes = 0; + info[dim].nbytes_aligned = 0; + for (i = 0; i < info[dim].nvalues; i++) + { + Size len; + + /* + * cstring is handled similar to varlena - first we store the + * length as uint32 and then the data. We don't care about + * alignment, which means that during deserialization we need + * to copy the fields and only access the copies. + */ + + /* c-strings include terminator, so +1 byte */ + len = strlen(DatumGetCString(values[dim][i])) + 1; + info[dim].nbytes += sizeof(uint32); /* length */ + info[dim].nbytes += len; /* value */ + + /* space needed for properly aligned deserialized copies */ + info[dim].nbytes_aligned += MAXALIGN(len); + } + } + + /* we know (count>0) so there must be some data */ + Assert(info[dim].nbytes > 0); + } + + /* + * Now we can finally compute how much space we'll actually need for the + * whole serialized MCV list (varlena header, MCV header, dimension info + * for each attribute, deduplicated values and items). + */ + total_length = (3 * sizeof(uint32)) /* magic + type + nitems */ + + sizeof(AttrNumber) /* ndimensions */ + + (ndims * sizeof(Oid)); /* attribute types */ + + /* dimension info */ + total_length += ndims * sizeof(DimensionInfo); + + /* add space for the arrays of deduplicated values */ + for (i = 0; i < ndims; i++) + total_length += info[i].nbytes; + + /* + * And finally account for the items (those are fixed-length, thanks to + * replacing values with uint16 indexes into the deduplicated arrays). + */ + total_length += mcvlist->nitems * ITEM_SIZE(dim); + + /* + * Allocate space for the whole serialized MCV list (we'll skip bytes, so + * we set them to zero to make the result more compressible). + */ + raw = (bytea *) palloc0(VARHDRSZ + total_length); + SET_VARSIZE(raw, VARHDRSZ + total_length); + + ptr = VARDATA(raw); + endptr = ptr + total_length; + + /* copy the MCV list header fields, one by one */ + memcpy(ptr, &mcvlist->magic, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(ptr, &mcvlist->type, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(ptr, &mcvlist->nitems, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber)); + ptr += sizeof(AttrNumber); + + memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims); + ptr += (sizeof(Oid) * ndims); + + /* store information about the attributes (data amounts, ...) */ + memcpy(ptr, info, sizeof(DimensionInfo) * ndims); + ptr += sizeof(DimensionInfo) * ndims; + + /* Copy the deduplicated values for all attributes to the output. */ + for (dim = 0; dim < ndims; dim++) + { + /* remember the starting point for Asserts later */ + char *start PG_USED_FOR_ASSERTS_ONLY = ptr; + + for (i = 0; i < info[dim].nvalues; i++) + { + Datum value = values[dim][i]; + + if (info[dim].typbyval) /* passed by value */ + { + Datum tmp; + + /* + * For byval types, we need to copy just the significant bytes + * - we can't use memcpy directly, as that assumes + * little-endian behavior. store_att_byval does almost what + * we need, but it requires a properly aligned buffer - the + * output buffer does not guarantee that. So we simply use a + * local Datum variable (which guarantees proper alignment), + * and then copy the value from it. + */ + store_att_byval(&tmp, value, info[dim].typlen); + + memcpy(ptr, &tmp, info[dim].typlen); + ptr += info[dim].typlen; + } + else if (info[dim].typlen > 0) /* passed by reference */ + { + /* no special alignment needed, treated as char array */ + memcpy(ptr, DatumGetPointer(value), info[dim].typlen); + ptr += info[dim].typlen; + } + else if (info[dim].typlen == -1) /* varlena */ + { + uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + + /* copy the length */ + memcpy(ptr, &len, sizeof(uint32)); + ptr += sizeof(uint32); + + /* data from the varlena value (without the header) */ + memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len); + ptr += len; + } + else if (info[dim].typlen == -2) /* cstring */ + { + uint32 len = (uint32) strlen(DatumGetCString(value)) + 1; + + /* copy the length */ + memcpy(ptr, &len, sizeof(uint32)); + ptr += sizeof(uint32); + + /* value */ + memcpy(ptr, DatumGetCString(value), len); + ptr += len; + } + + /* no underflows or overflows */ + Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes)); + } + + /* we should get exactly nbytes of data for this dimension */ + Assert((ptr - start) == info[dim].nbytes); + } + + /* Serialize the items, with uint16 indexes instead of the values. */ + for (i = 0; i < mcvlist->nitems; i++) + { + MCVItem *mcvitem = &mcvlist->items[i]; + + /* don't write beyond the allocated space */ + Assert(ptr <= (endptr - ITEM_SIZE(dim))); + + /* copy NULL and frequency flags into the serialized MCV */ + memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims); + ptr += sizeof(bool) * ndims; + + memcpy(ptr, &mcvitem->frequency, sizeof(double)); + ptr += sizeof(double); + + memcpy(ptr, &mcvitem->base_frequency, sizeof(double)); + ptr += sizeof(double); + + /* store the indexes last */ + for (dim = 0; dim < ndims; dim++) + { + uint16 index = 0; + Datum *value; + + /* do the lookup only for non-NULL values */ + if (!mcvitem->isnull[dim]) + { + value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim], + info[dim].nvalues, sizeof(Datum), + compare_scalars_simple, &ssup[dim]); + + Assert(value != NULL); /* serialization or deduplication + * error */ + + /* compute index within the deduplicated array */ + index = (uint16) (value - values[dim]); + + /* check the index is within expected bounds */ + Assert(index < info[dim].nvalues); + } + + /* copy the index into the serialized MCV */ + memcpy(ptr, &index, sizeof(uint16)); + ptr += sizeof(uint16); + } + + /* make sure we don't overflow the allocated value */ + Assert(ptr <= endptr); + } + + /* at this point we expect to match the total_length exactly */ + Assert(ptr == endptr); + + pfree(values); + pfree(counts); + + return raw; +} + +/* + * statext_mcv_deserialize + * Reads serialized MCV list into MCVList structure. + * + * All the memory needed by the MCV list is allocated as a single chunk, so + * it's possible to simply pfree() it at once. + */ +MCVList * +statext_mcv_deserialize(bytea *data) +{ + int dim, + i; + Size expected_size; + MCVList *mcvlist; + char *raw; + char *ptr; + char *endptr PG_USED_FOR_ASSERTS_ONLY; + + int ndims, + nitems; + DimensionInfo *info = NULL; + + /* local allocation buffer (used only for deserialization) */ + Datum **map = NULL; + + /* MCV list */ + Size mcvlen; + + /* buffer used for the result */ + Size datalen; + char *dataptr; + char *valuesptr; + char *isnullptr; + + if (data == NULL) + return NULL; + + /* + * We can't possibly deserialize a MCV list if there's not even a complete + * header. We need an explicit formula here, because we serialize the + * header fields one by one, so we need to ignore struct alignment. + */ + if (VARSIZE_ANY(data) < MinSizeOfMCVList) + elog(ERROR, "invalid MCV size %zu (expected at least %zu)", + VARSIZE_ANY(data), MinSizeOfMCVList); + + /* read the MCV list header */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); + + /* pointer to the data part (skip the varlena header) */ + raw = (char *) data; + ptr = VARDATA_ANY(raw); + endptr = (char *) raw + VARSIZE_ANY(data); + + /* get the header and perform further sanity checks */ + memcpy(&mcvlist->magic, ptr, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(&mcvlist->type, ptr, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(&mcvlist->nitems, ptr, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber)); + ptr += sizeof(AttrNumber); + + if (mcvlist->magic != STATS_MCV_MAGIC) + elog(ERROR, "invalid MCV magic %u (expected %u)", + mcvlist->magic, STATS_MCV_MAGIC); + + if (mcvlist->type != STATS_MCV_TYPE_BASIC) + elog(ERROR, "invalid MCV type %u (expected %u)", + mcvlist->type, STATS_MCV_TYPE_BASIC); + + if (mcvlist->ndimensions == 0) + elog(ERROR, "invalid zero-length dimension array in MCVList"); + else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) || + (mcvlist->ndimensions < 0)) + elog(ERROR, "invalid length (%d) dimension array in MCVList", + mcvlist->ndimensions); + + if (mcvlist->nitems == 0) + elog(ERROR, "invalid zero-length item array in MCVList"); + else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS) + elog(ERROR, "invalid length (%u) item array in MCVList", + mcvlist->nitems); + + nitems = mcvlist->nitems; + ndims = mcvlist->ndimensions; + + /* + * Check amount of data including DimensionInfo for all dimensions and + * also the serialized items (including uint16 indexes). Also, walk + * through the dimension information and add it to the sum. + */ + expected_size = SizeOfMCVList(ndims, nitems); + + /* + * Check that we have at least the dimension and info records, along with + * the items. We don't know the size of the serialized values yet. We need + * to do this check first, before accessing the dimension info. + */ + if (VARSIZE_ANY(data) < expected_size) + elog(ERROR, "invalid MCV size %zu (expected %zu)", + VARSIZE_ANY(data), expected_size); + + /* Now copy the array of type Oids. */ + memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims); + ptr += (sizeof(Oid) * ndims); + + /* Now it's safe to access the dimension info. */ + info = palloc(ndims * sizeof(DimensionInfo)); + + memcpy(info, ptr, ndims * sizeof(DimensionInfo)); + ptr += (ndims * sizeof(DimensionInfo)); + + /* account for the value arrays */ + for (dim = 0; dim < ndims; dim++) + { + /* + * XXX I wonder if we can/should rely on asserts here. Maybe those + * checks should be done every time? + */ + Assert(info[dim].nvalues >= 0); + Assert(info[dim].nbytes >= 0); + + expected_size += info[dim].nbytes; + } + + /* + * Now we know the total expected MCV size, including all the pieces + * (header, dimension info. items and deduplicated data). So do the final + * check on size. + */ + if (VARSIZE_ANY(data) != expected_size) + elog(ERROR, "invalid MCV size %zu (expected %zu)", + VARSIZE_ANY(data), expected_size); + + /* + * We need an array of Datum values for each dimension, so that we can + * easily translate the uint16 indexes later. We also need a top-level + * array of pointers to those per-dimension arrays. + * + * While allocating the arrays for dimensions, compute how much space we + * need for a copy of the by-ref data, as we can't simply point to the + * original values (it might go away). + */ + datalen = 0; /* space for by-ref data */ + map = (Datum **) palloc(ndims * sizeof(Datum *)); + + for (dim = 0; dim < ndims; dim++) + { + map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues); + + /* space needed for a copy of data for by-ref types */ + datalen += info[dim].nbytes_aligned; + } + + /* + * Now resize the MCV list so that the allocation includes all the data. + * + * Allocate space for a copy of the data, as we can't simply reference the + * serialized data - it's not aligned properly, and it may disappear while + * we're still using the MCV list, e.g. due to catcache release. + * + * We do care about alignment here, because we will allocate all the + * pieces at once, but then use pointers to different parts. + */ + mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + /* arrays of values and isnull flags for all MCV items */ + mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims); + mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims); + + /* we don't quite need to align this, but it makes some asserts easier */ + mcvlen += MAXALIGN(datalen); + + /* now resize the deserialized MCV list, and compute pointers to parts */ + mcvlist = repalloc(mcvlist, mcvlen); + + /* pointer to the beginning of values/isnull arrays */ + valuesptr = (char *) mcvlist + + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims)); + + dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims)); + + /* + * Build mapping (index => value) for translating the serialized data into + * the in-memory representation. + */ + for (dim = 0; dim < ndims; dim++) + { + /* remember start position in the input array */ + char *start PG_USED_FOR_ASSERTS_ONLY = ptr; + + if (info[dim].typbyval) + { + /* for by-val types we simply copy data into the mapping */ + for (i = 0; i < info[dim].nvalues; i++) + { + Datum v = 0; + + memcpy(&v, ptr, info[dim].typlen); + ptr += info[dim].typlen; + + map[dim][i] = fetch_att(&v, true, info[dim].typlen); + + /* no under/overflow of input array */ + Assert(ptr <= (start + info[dim].nbytes)); + } + } + else + { + /* for by-ref types we need to also make a copy of the data */ + + /* passed by reference, but fixed length (name, tid, ...) */ + if (info[dim].typlen > 0) + { + for (i = 0; i < info[dim].nvalues; i++) + { + memcpy(dataptr, ptr, info[dim].typlen); + ptr += info[dim].typlen; + + /* just point into the array */ + map[dim][i] = PointerGetDatum(dataptr); + dataptr += MAXALIGN(info[dim].typlen); + } + } + else if (info[dim].typlen == -1) + { + /* varlena */ + for (i = 0; i < info[dim].nvalues; i++) + { + uint32 len; + + /* read the uint32 length */ + memcpy(&len, ptr, sizeof(uint32)); + ptr += sizeof(uint32); + + /* the length is data-only */ + SET_VARSIZE(dataptr, len + VARHDRSZ); + memcpy(VARDATA(dataptr), ptr, len); + ptr += len; + + /* just point into the array */ + map[dim][i] = PointerGetDatum(dataptr); + + /* skip to place of the next deserialized value */ + dataptr += MAXALIGN(len + VARHDRSZ); + } + } + else if (info[dim].typlen == -2) + { + /* cstring */ + for (i = 0; i < info[dim].nvalues; i++) + { + uint32 len; + + memcpy(&len, ptr, sizeof(uint32)); + ptr += sizeof(uint32); + + memcpy(dataptr, ptr, len); + ptr += len; + + /* just point into the array */ + map[dim][i] = PointerGetDatum(dataptr); + dataptr += MAXALIGN(len); + } + } + + /* no under/overflow of input array */ + Assert(ptr <= (start + info[dim].nbytes)); + + /* no overflow of the output mcv value */ + Assert(dataptr <= ((char *) mcvlist + mcvlen)); + } + + /* check we consumed input data for this dimension exactly */ + Assert(ptr == (start + info[dim].nbytes)); + } + + /* we should have also filled the MCV list exactly */ + Assert(dataptr == ((char *) mcvlist + mcvlen)); + + /* deserialize the MCV items and translate the indexes to Datums */ + for (i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + item->values = (Datum *) valuesptr; + valuesptr += MAXALIGN(sizeof(Datum) * ndims); + + item->isnull = (bool *) isnullptr; + isnullptr += MAXALIGN(sizeof(bool) * ndims); + + memcpy(item->isnull, ptr, sizeof(bool) * ndims); + ptr += sizeof(bool) * ndims; + + memcpy(&item->frequency, ptr, sizeof(double)); + ptr += sizeof(double); + + memcpy(&item->base_frequency, ptr, sizeof(double)); + ptr += sizeof(double); + + /* finally translate the indexes (for non-NULL only) */ + for (dim = 0; dim < ndims; dim++) + { + uint16 index; + + memcpy(&index, ptr, sizeof(uint16)); + ptr += sizeof(uint16); + + if (item->isnull[dim]) + continue; + + item->values[dim] = map[dim][index]; + } + + /* check we're not overflowing the input */ + Assert(ptr <= endptr); + } + + /* check that we processed all the data */ + Assert(ptr == endptr); + + /* release the buffers used for mapping */ + for (dim = 0; dim < ndims; dim++) + pfree(map[dim]); + + pfree(map); + + return mcvlist; +} + +/* + * SRF with details about buckets of a histogram: + * + * - item ID (0...nitems) + * - values (string array) + * - nulls only (boolean array) + * - frequency (double precision) + * - base_frequency (double precision) + * + * The input is the OID of the statistics, and there are no rows returned if + * the statistics contains no histogram. + */ +Datum +pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + MCVList *mcvlist; + TupleDesc tupdesc; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0)); + + funcctx->user_fctx = mcvlist; + + /* total number of tuples to be returned */ + funcctx->max_calls = 0; + if (funcctx->user_fctx != NULL) + funcctx->max_calls = mcvlist->nitems; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + tupdesc = BlessTupleDesc(tupdesc); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more + * left to send */ + { + Datum values[5]; + bool nulls[5]; + HeapTuple tuple; + Datum result; + ArrayBuildState *astate_values = NULL; + ArrayBuildState *astate_nulls = NULL; + + int i; + MCVList *mcvlist; + MCVItem *item; + + mcvlist = (MCVList *) funcctx->user_fctx; + + Assert(funcctx->call_cntr < mcvlist->nitems); + + item = &mcvlist->items[funcctx->call_cntr]; + + for (i = 0; i < mcvlist->ndimensions; i++) + { + + astate_nulls = accumArrayResult(astate_nulls, + BoolGetDatum(item->isnull[i]), + false, + BOOLOID, + CurrentMemoryContext); + + if (!item->isnull[i]) + { + bool isvarlena; + Oid outfunc; + FmgrInfo fmgrinfo; + Datum val; + text *txt; + + /* lookup output func for the type */ + getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena); + fmgr_info(outfunc, &fmgrinfo); + + val = FunctionCall1(&fmgrinfo, item->values[i]); + txt = cstring_to_text(DatumGetPointer(val)); + + astate_values = accumArrayResult(astate_values, + PointerGetDatum(txt), + false, + TEXTOID, + CurrentMemoryContext); + } + else + astate_values = accumArrayResult(astate_values, + (Datum) 0, + true, + TEXTOID, + CurrentMemoryContext); + } + + values[0] = Int32GetDatum(funcctx->call_cntr); + values[1] = makeArrayResult(astate_values, CurrentMemoryContext); + values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext); + values[3] = Float8GetDatum(item->frequency); + values[4] = Float8GetDatum(item->base_frequency); + + /* no NULLs in the tuple */ + memset(nulls, 0, sizeof(nulls)); + + /* build a tuple */ + tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + SRF_RETURN_NEXT(funcctx, result); + } + else /* do when there is no more left */ + { + SRF_RETURN_DONE(funcctx); + } +} + +/* + * pg_mcv_list_in - input routine for type pg_mcv_list. + * + * pg_mcv_list is real enough to be a table column, but it has no operations + * of its own, and disallows input too + */ +Datum +pg_mcv_list_in(PG_FUNCTION_ARGS) +{ + /* + * pg_mcv_list stores the data in binary form and parsing text input is + * not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_mcv_list"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* + * pg_mcv_list_out - output routine for type pg_mcv_list. + * + * MCV lists are serialized into a bytea value, so we simply call byteaout() + * to serialize the value into text. But it'd be nice to serialize that into + * a meaningful representation (e.g. for inspection by people). + * + * XXX This should probably return something meaningful, similar to what + * pg_dependencies_out does. Not sure how to deal with the deduplicated + * values, though - do we want to expand that or not? + */ +Datum +pg_mcv_list_out(PG_FUNCTION_ARGS) +{ + return byteaout(fcinfo); +} + +/* + * pg_mcv_list_recv - binary input routine for type pg_mcv_list. + */ +Datum +pg_mcv_list_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_mcv_list"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_mcv_list_send - binary output routine for type pg_mcv_list. + * + * MCV lists are serialized in a bytea value (although the type is named + * differently), so let's just send that. + */ +Datum +pg_mcv_list_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * match the attribute/expression to a dimension of the statistic + * + * Returns the zero-based index of the matching statistics dimension. + * Optionally determines the collation. + */ +static int +mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) +{ + int idx; + + if (IsA(expr, Var)) + { + /* simple Var, so just lookup using varattno */ + Var *var = (Var *) expr; + + if (collid) + *collid = var->varcollid; + + idx = bms_member_index(keys, var->varattno); + + if (idx < 0) + elog(ERROR, "variable not found in statistics object"); + } + else + { + /* expression - lookup in stats expressions */ + ListCell *lc; + + if (collid) + *collid = exprCollation(expr); + + /* expressions are stored after the simple columns */ + idx = bms_num_members(keys); + foreach(lc, exprs) + { + Node *stat_expr = (Node *) lfirst(lc); + + if (equal(expr, stat_expr)) + break; + + idx++; + } + + if (lc == NULL) + elog(ERROR, "expression not found in statistics object"); + } + + return idx; +} + +/* + * mcv_get_match_bitmap + * Evaluate clauses using the MCV list, and update the match bitmap. + * + * A match bitmap keeps match/mismatch status for each MCV item, and we + * update it based on additional clauses. We also use it to skip items + * that can't possibly match (e.g. item marked as "mismatch" can't change + * to "match" when evaluating AND clause list). + * + * The function also returns a flag indicating whether there was an + * equality condition for all attributes, the minimum frequency in the MCV + * list, and a total MCV frequency (sum of frequencies for all items). + * + * XXX Currently the match bitmap uses a bool for each MCV item, which is + * somewhat wasteful as we could do with just a single bit, thus reducing + * the size to ~1/8. It would also allow us to combine bitmaps simply using + * & and |, which should be faster than min/max. The bitmaps are fairly + * small, though (thanks to the cap on the MCV list size). + */ +static bool * +mcv_get_match_bitmap(PlannerInfo *root, List *clauses, + Bitmapset *keys, List *exprs, + MCVList *mcvlist, bool is_or) +{ + ListCell *l; + bool *matches; + + /* The bitmap may be partially built. */ + Assert(clauses != NIL); + Assert(mcvlist != NULL); + Assert(mcvlist->nitems > 0); + Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS); + + matches = palloc(sizeof(bool) * mcvlist->nitems); + memset(matches, !is_or, sizeof(bool) * mcvlist->nitems); + + /* + * Loop through the list of clauses, and for each of them evaluate all the + * MCV items not yet eliminated by the preceding clauses. + */ + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + + /* if it's a RestrictInfo, then extract the clause */ + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + /* + * Handle the various types of clauses - OpClause, NullTest and + * AND/OR/NOT + */ + if (is_opclause(clause)) + { + OpExpr *expr = (OpExpr *) clause; + FmgrInfo opproc; + + /* valid only after examine_opclause_args returns true */ + Node *clause_expr; + Const *cst; + bool expronleft; + int idx; + Oid collid; + + fmgr_info(get_opcode(expr->opno), &opproc); + + /* extract the var/expr and const from the expression */ + if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + elog(ERROR, "incompatible clause"); + + /* match the attribute/expression to a dimension of the statistic */ + idx = mcv_match_expression(clause_expr, keys, exprs, &collid); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (int i = 0; i < mcvlist->nitems; i++) + { + bool match = true; + MCVItem *item = &mcvlist->items[i]; + + Assert(idx >= 0); + + /* + * When the MCV item or the Const value is NULL we can treat + * this as a mismatch. We must not call the operator because + * of strictness. + */ + if (item->isnull[idx] || cst->constisnull) + { + matches[i] = RESULT_MERGE(matches[i], is_or, false); + continue; + } + + /* + * Skip MCV items that can't change result in the bitmap. Once + * the value gets false for AND-lists, or true for OR-lists, + * we don't need to look at more clauses. + */ + if (RESULT_IS_FINAL(matches[i], is_or)) + continue; + + /* + * First check whether the constant is below the lower + * boundary (in that case we can skip the bucket, because + * there's no overlap). + * + * We don't store collations used to build the statistics, but + * we can use the collation for the attribute itself, as + * stored in varcollid. We do reset the statistics after a + * type change (including collation change), so this is OK. + * For expressions, we use the collation extracted from the + * expression itself. + */ + if (expronleft) + match = DatumGetBool(FunctionCall2Coll(&opproc, + collid, + item->values[idx], + cst->constvalue)); + else + match = DatumGetBool(FunctionCall2Coll(&opproc, + collid, + cst->constvalue, + item->values[idx])); + + /* update the match bitmap with the result */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + FmgrInfo opproc; + + /* valid only after examine_opclause_args returns true */ + Node *clause_expr; + Const *cst; + bool expronleft; + Oid collid; + int idx; + + /* array evaluation */ + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + + fmgr_info(get_opcode(expr->opno), &opproc); + + /* extract the var/expr and const from the expression */ + if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + elog(ERROR, "incompatible clause"); + + /* We expect Var on left */ + if (!expronleft) + elog(ERROR, "incompatible clause"); + + /* + * Deconstruct the array constant, unless it's NULL (we'll cover + * that case below) + */ + if (!cst->constisnull) + { + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + } + + /* match the attribute/expression to a dimension of the statistic */ + idx = mcv_match_expression(clause_expr, keys, exprs, &collid); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (int i = 0; i < mcvlist->nitems; i++) + { + int j; + bool match = !expr->useOr; + MCVItem *item = &mcvlist->items[i]; + + /* + * When the MCV item or the Const value is NULL we can treat + * this as a mismatch. We must not call the operator because + * of strictness. + */ + if (item->isnull[idx] || cst->constisnull) + { + matches[i] = RESULT_MERGE(matches[i], is_or, false); + continue; + } + + /* + * Skip MCV items that can't change result in the bitmap. Once + * the value gets false for AND-lists, or true for OR-lists, + * we don't need to look at more clauses. + */ + if (RESULT_IS_FINAL(matches[i], is_or)) + continue; + + for (j = 0; j < num_elems; j++) + { + Datum elem_value = elem_values[j]; + bool elem_isnull = elem_nulls[j]; + bool elem_match; + + /* NULL values always evaluate as not matching. */ + if (elem_isnull) + { + match = RESULT_MERGE(match, expr->useOr, false); + continue; + } + + /* + * Stop evaluating the array elements once we reach a + * matching value that can't change - ALL() is the same as + * AND-list, ANY() is the same as OR-list. + */ + if (RESULT_IS_FINAL(match, expr->useOr)) + break; + + elem_match = DatumGetBool(FunctionCall2Coll(&opproc, + collid, + item->values[idx], + elem_value)); + + match = RESULT_MERGE(match, expr->useOr, elem_match); + } + + /* update the match bitmap with the result */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + else if (IsA(clause, NullTest)) + { + NullTest *expr = (NullTest *) clause; + Node *clause_expr = (Node *) (expr->arg); + + /* match the attribute/expression to a dimension of the statistic */ + int idx = mcv_match_expression(clause_expr, keys, exprs, NULL); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (int i = 0; i < mcvlist->nitems; i++) + { + bool match = false; /* assume mismatch */ + MCVItem *item = &mcvlist->items[i]; + + /* if the clause mismatches the MCV item, update the bitmap */ + switch (expr->nulltesttype) + { + case IS_NULL: + match = (item->isnull[idx]) ? true : match; + break; + + case IS_NOT_NULL: + match = (!item->isnull[idx]) ? true : match; + break; + } + + /* now, update the match bitmap, depending on OR/AND type */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + else if (is_orclause(clause) || is_andclause(clause)) + { + /* AND/OR clause, with all subclauses being compatible */ + + int i; + BoolExpr *bool_clause = ((BoolExpr *) clause); + List *bool_clauses = bool_clause->args; + + /* match/mismatch bitmap for each MCV item */ + bool *bool_matches = NULL; + + Assert(bool_clauses != NIL); + Assert(list_length(bool_clauses) >= 2); + + /* build the match bitmap for the OR-clauses */ + bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs, + mcvlist, is_orclause(clause)); + + /* + * Merge the bitmap produced by mcv_get_match_bitmap into the + * current one. We need to consider if we're evaluating AND or OR + * condition when merging the results. + */ + for (i = 0; i < mcvlist->nitems; i++) + matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]); + + pfree(bool_matches); + } + else if (is_notclause(clause)) + { + /* NOT clause, with all subclauses compatible */ + + int i; + BoolExpr *not_clause = ((BoolExpr *) clause); + List *not_args = not_clause->args; + + /* match/mismatch bitmap for each MCV item */ + bool *not_matches = NULL; + + Assert(not_args != NIL); + Assert(list_length(not_args) == 1); + + /* build the match bitmap for the NOT-clause */ + not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs, + mcvlist, false); + + /* + * Merge the bitmap produced by mcv_get_match_bitmap into the + * current one. We're handling a NOT clause, so invert the result + * before merging it into the global bitmap. + */ + for (i = 0; i < mcvlist->nitems; i++) + matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]); + + pfree(not_matches); + } + else if (IsA(clause, Var)) + { + /* Var (has to be a boolean Var, possibly from below NOT) */ + + Var *var = (Var *) (clause); + + /* match the attribute to a dimension of the statistic */ + int idx = bms_member_index(keys, var->varattno); + + Assert(var->vartype == BOOLOID); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (int i = 0; i < mcvlist->nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + bool match = false; + + /* if the item is NULL, it's a mismatch */ + if (!item->isnull[idx] && DatumGetBool(item->values[idx])) + match = true; + + /* update the result bitmap */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + else + { + /* Otherwise, it must be a bare boolean-returning expression */ + int idx; + + /* match the expression to a dimension of the statistic */ + idx = mcv_match_expression(clause, keys, exprs, NULL); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (int i = 0; i < mcvlist->nitems; i++) + { + bool match; + MCVItem *item = &mcvlist->items[i]; + + /* "match" just means it's bool TRUE */ + match = !item->isnull[idx] && DatumGetBool(item->values[idx]); + + /* now, update the match bitmap, depending on OR/AND type */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + } + + return matches; +} + + +/* + * mcv_combine_selectivities + * Combine per-column and multi-column MCV selectivity estimates. + * + * simple_sel is a "simple" selectivity estimate (produced without using any + * extended statistics, essentially assuming independence of columns/clauses). + * + * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of + * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then + * essentially interpreted as a correction to be added to simple_sel, as + * described below. + * + * mcv_totalsel is the sum of the frequencies of all MCV items (not just the + * matching ones). This is used as an upper bound on the portion of the + * selectivity estimates not covered by the MCV statistics. + * + * Note: While simple and base selectivities are defined in a quite similar + * way, the values are computed differently and are not therefore equal. The + * simple selectivity is computed as a product of per-clause estimates, while + * the base selectivity is computed by adding up base frequencies of matching + * items of the multi-column MCV list. So the values may differ for two main + * reasons - (a) the MCV list may not cover 100% of the data and (b) some of + * the MCV items did not match the estimated clauses. + * + * As both (a) and (b) reduce the base selectivity value, it generally holds + * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the + * values may be equal. + * + * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not + * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a + * correction for the part covered by the MCV list. Those two statements are + * actually equivalent. + */ +Selectivity +mcv_combine_selectivities(Selectivity simple_sel, + Selectivity mcv_sel, + Selectivity mcv_basesel, + Selectivity mcv_totalsel) +{ + Selectivity other_sel; + Selectivity sel; + + /* estimated selectivity of values not covered by MCV matches */ + other_sel = simple_sel - mcv_basesel; + CLAMP_PROBABILITY(other_sel); + + /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */ + if (other_sel > 1.0 - mcv_totalsel) + other_sel = 1.0 - mcv_totalsel; + + /* overall selectivity is the sum of the MCV and non-MCV parts */ + sel = mcv_sel + other_sel; + CLAMP_PROBABILITY(sel); + + return sel; +} + + +/* + * mcv_clauselist_selectivity + * Use MCV statistics to estimate the selectivity of an implicitly-ANDed + * list of clauses. + * + * This determines which MCV items match every clause in the list and returns + * the sum of the frequencies of those items. + * + * In addition, it returns the sum of the base frequencies of each of those + * items (that is the sum of the selectivities that each item would have if + * the columns were independent of one another), and the total selectivity of + * all the MCV items (not just the matching ones). These are expected to be + * used together with a "simple" selectivity estimate (one based only on + * per-column statistics) to produce an overall selectivity estimate that + * makes use of both per-column and multi-column statistics --- see + * mcv_combine_selectivities(). + */ +Selectivity +mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, + List *clauses, int varRelid, + JoinType jointype, SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, Selectivity *totalsel) +{ + int i; + MCVList *mcv; + Selectivity s = 0.0; + RangeTblEntry *rte = root->simple_rte_array[rel->relid]; + + /* match/mismatch bitmap for each MCV item */ + bool *matches = NULL; + + /* load the MCV list stored in the statistics object */ + mcv = statext_mcv_load(stat->statOid, rte->inh); + + /* build a match bitmap for the clauses */ + matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs, + mcv, false); + + /* sum frequencies for all the matching MCV items */ + *basesel = 0.0; + *totalsel = 0.0; + for (i = 0; i < mcv->nitems; i++) + { + *totalsel += mcv->items[i].frequency; + + if (matches[i] != false) + { + *basesel += mcv->items[i].base_frequency; + s += mcv->items[i].frequency; + } + } + + return s; +} + + +/* + * mcv_clause_selectivity_or + * Use MCV statistics to estimate the selectivity of a clause that + * appears in an ORed list of clauses. + * + * As with mcv_clauselist_selectivity() this determines which MCV items match + * the clause and returns both the sum of the frequencies and the sum of the + * base frequencies of those items, as well as the sum of the frequencies of + * all MCV items (not just the matching ones) so that this information can be + * used by mcv_combine_selectivities() to produce a selectivity estimate that + * makes use of both per-column and multi-column statistics. + * + * Additionally, we return information to help compute the overall selectivity + * of the ORed list of clauses assumed to contain this clause. This function + * is intended to be called for each clause in the ORed list of clauses, + * allowing the overall selectivity to be computed using the following + * algorithm: + * + * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity + * of the first n clauses in the list. Then the combined selectivity taking + * into account the next clause C[n+1] can be written as + * + * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1]) + * + * The final term above represents the overlap between the clauses examined so + * far and the (n+1)'th clause. To estimate its selectivity, we track the + * match bitmap for the ORed list of clauses examined so far and examine its + * intersection with the match bitmap for the (n+1)'th clause. + * + * We then also return the sums of the MCV item frequencies and base + * frequencies for the match bitmap intersection corresponding to the overlap + * term above, so that they can be combined with a simple selectivity estimate + * for that term. + * + * The parameter "or_matches" is an in/out parameter tracking the match bitmap + * for the clauses examined so far. The caller is expected to set it to NULL + * the first time it calls this function. + */ +Selectivity +mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, + MCVList *mcv, Node *clause, bool **or_matches, + Selectivity *basesel, Selectivity *overlap_mcvsel, + Selectivity *overlap_basesel, Selectivity *totalsel) +{ + Selectivity s = 0.0; + bool *new_matches; + int i; + + /* build the OR-matches bitmap, if not built already */ + if (*or_matches == NULL) + *or_matches = palloc0(sizeof(bool) * mcv->nitems); + + /* build the match bitmap for the new clause */ + new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys, + stat->exprs, mcv, false); + + /* + * Sum the frequencies for all the MCV items matching this clause and also + * those matching the overlap between this clause and any of the preceding + * clauses as described above. + */ + *basesel = 0.0; + *overlap_mcvsel = 0.0; + *overlap_basesel = 0.0; + *totalsel = 0.0; + for (i = 0; i < mcv->nitems; i++) + { + *totalsel += mcv->items[i].frequency; + + if (new_matches[i]) + { + s += mcv->items[i].frequency; + *basesel += mcv->items[i].base_frequency; + + if ((*or_matches)[i]) + { + *overlap_mcvsel += mcv->items[i].frequency; + *overlap_basesel += mcv->items[i].base_frequency; + } + } + + /* update the OR-matches bitmap for the next clause */ + (*or_matches)[i] = (*or_matches)[i] || new_matches[i]; + } + + pfree(new_matches); + + return s; +} diff --git a/src/backend/statistics/meson.build b/src/backend/statistics/meson.build new file mode 100644 index 0000000..e12737b --- /dev/null +++ b/src/backend/statistics/meson.build @@ -0,0 +1,8 @@ +# Copyright (c) 2022-2023, PostgreSQL Global Development Group + +backend_sources += files( + 'dependencies.c', + 'extended_stats.c', + 'mcv.c', + 'mvdistinct.c', +) diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c new file mode 100644 index 0000000..6d25c14 --- /dev/null +++ b/src/backend/statistics/mvdistinct.c @@ -0,0 +1,700 @@ +/*------------------------------------------------------------------------- + * + * mvdistinct.c + * POSTGRES multivariate ndistinct coefficients + * + * Estimating number of groups in a combination of columns (e.g. for GROUP BY) + * is tricky, and the estimation error is often significant. + + * The multivariate ndistinct coefficients address this by storing ndistinct + * estimates for combinations of the user-specified columns. So for example + * given a statistics object on three columns (a,b,c), this module estimates + * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column + * estimates are already available in pg_statistic. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mvdistinct.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "lib/stringinfo.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +static double ndistinct_for_combination(double totalrows, StatsBuildData *data, + int k, int *combination); +static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); +static int n_choose_k(int n, int k); +static int num_combinations(int n); + +/* size of the struct header fields (magic, type, nitems) */ +#define SizeOfHeader (3 * sizeof(uint32)) + +/* size of a serialized ndistinct item (coefficient, natts, atts) */ +#define SizeOfItem(natts) \ + (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber)) + +/* minimal size of a ndistinct item (with two attributes) */ +#define MinSizeOfItem SizeOfItem(2) + +/* minimal size of mvndistinct, when all items are minimal */ +#define MinSizeOfItems(nitems) \ + (SizeOfHeader + (nitems) * MinSizeOfItem) + +/* Combination generator API */ + +/* internal state for generator of k-combinations of n elements */ +typedef struct CombinationGenerator +{ + int k; /* size of the combination */ + int n; /* total number of elements */ + int current; /* index of the next combination to return */ + int ncombinations; /* number of combinations (size of array) */ + int *combinations; /* array of pre-built combinations */ +} CombinationGenerator; + +static CombinationGenerator *generator_init(int n, int k); +static void generator_free(CombinationGenerator *state); +static int *generator_next(CombinationGenerator *state); +static void generate_combinations(CombinationGenerator *state); + + +/* + * statext_ndistinct_build + * Compute ndistinct coefficient for the combination of attributes. + * + * This computes the ndistinct estimate using the same estimator used + * in analyze.c and then computes the coefficient. + * + * To handle expressions easily, we treat them as system attributes with + * negative attnums, and offset everything by number of expressions to + * allow using Bitmapsets. + */ +MVNDistinct * +statext_ndistinct_build(double totalrows, StatsBuildData *data) +{ + MVNDistinct *result; + int k; + int itemcnt; + int numattrs = data->nattnums; + int numcombs = num_combinations(numattrs); + + result = palloc(offsetof(MVNDistinct, items) + + numcombs * sizeof(MVNDistinctItem)); + result->magic = STATS_NDISTINCT_MAGIC; + result->type = STATS_NDISTINCT_TYPE_BASIC; + result->nitems = numcombs; + + itemcnt = 0; + for (k = 2; k <= numattrs; k++) + { + int *combination; + CombinationGenerator *generator; + + /* generate combinations of K out of N elements */ + generator = generator_init(numattrs, k); + + while ((combination = generator_next(generator))) + { + MVNDistinctItem *item = &result->items[itemcnt]; + int j; + + item->attributes = palloc(sizeof(AttrNumber) * k); + item->nattributes = k; + + /* translate the indexes to attnums */ + for (j = 0; j < k; j++) + { + item->attributes[j] = data->attnums[combination[j]]; + + Assert(AttributeNumberIsValid(item->attributes[j])); + } + + item->ndistinct = + ndistinct_for_combination(totalrows, data, k, combination); + + itemcnt++; + Assert(itemcnt <= result->nitems); + } + + generator_free(generator); + } + + /* must consume exactly the whole output array */ + Assert(itemcnt == result->nitems); + + return result; +} + +/* + * statext_ndistinct_load + * Load the ndistinct value for the indicated pg_statistic_ext tuple + */ +MVNDistinct * +statext_ndistinct_load(Oid mvoid, bool inh) +{ + MVNDistinct *result; + bool isnull; + Datum ndist; + HeapTuple htup; + + htup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(mvoid), BoolGetDatum(inh)); + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", mvoid); + + ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup, + Anum_pg_statistic_ext_data_stxdndistinct, &isnull); + if (isnull) + elog(ERROR, + "requested statistics kind \"%c\" is not yet built for statistics object %u", + STATS_EXT_NDISTINCT, mvoid); + + result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist)); + + ReleaseSysCache(htup); + + return result; +} + +/* + * statext_ndistinct_serialize + * serialize ndistinct to the on-disk bytea format + */ +bytea * +statext_ndistinct_serialize(MVNDistinct *ndistinct) +{ + int i; + bytea *output; + char *tmp; + Size len; + + Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC); + Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC); + + /* + * Base size is size of scalar fields in the struct, plus one base struct + * for each item, including number of items for each. + */ + len = VARHDRSZ + SizeOfHeader; + + /* and also include space for the actual attribute numbers */ + for (i = 0; i < ndistinct->nitems; i++) + { + int nmembers; + + nmembers = ndistinct->items[i].nattributes; + Assert(nmembers >= 2); + + len += SizeOfItem(nmembers); + } + + output = (bytea *) palloc(len); + SET_VARSIZE(output, len); + + tmp = VARDATA(output); + + /* Store the base struct values (magic, type, nitems) */ + memcpy(tmp, &ndistinct->magic, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(tmp, &ndistinct->type, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(tmp, &ndistinct->nitems, sizeof(uint32)); + tmp += sizeof(uint32); + + /* + * store number of attributes and attribute numbers for each entry + */ + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + int nmembers = item.nattributes; + + memcpy(tmp, &item.ndistinct, sizeof(double)); + tmp += sizeof(double); + memcpy(tmp, &nmembers, sizeof(int)); + tmp += sizeof(int); + + memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers); + tmp += nmembers * sizeof(AttrNumber); + + /* protect against overflows */ + Assert(tmp <= ((char *) output + len)); + } + + /* check we used exactly the expected space */ + Assert(tmp == ((char *) output + len)); + + return output; +} + +/* + * statext_ndistinct_deserialize + * Read an on-disk bytea format MVNDistinct to in-memory format + */ +MVNDistinct * +statext_ndistinct_deserialize(bytea *data) +{ + int i; + Size minimum_size; + MVNDistinct ndist; + MVNDistinct *ndistinct; + char *tmp; + + if (data == NULL) + return NULL; + + /* we expect at least the basic fields of MVNDistinct struct */ + if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader) + elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)", + VARSIZE_ANY_EXHDR(data), SizeOfHeader); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* read the header fields and perform basic sanity checks */ + memcpy(&ndist.magic, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(&ndist.type, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + memcpy(&ndist.nitems, tmp, sizeof(uint32)); + tmp += sizeof(uint32); + + if (ndist.magic != STATS_NDISTINCT_MAGIC) + elog(ERROR, "invalid ndistinct magic %08x (expected %08x)", + ndist.magic, STATS_NDISTINCT_MAGIC); + if (ndist.type != STATS_NDISTINCT_TYPE_BASIC) + elog(ERROR, "invalid ndistinct type %d (expected %d)", + ndist.type, STATS_NDISTINCT_TYPE_BASIC); + if (ndist.nitems == 0) + elog(ERROR, "invalid zero-length item array in MVNDistinct"); + + /* what minimum bytea size do we expect for those parameters */ + minimum_size = MinSizeOfItems(ndist.nitems); + if (VARSIZE_ANY_EXHDR(data) < minimum_size) + elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)", + VARSIZE_ANY_EXHDR(data), minimum_size); + + /* + * Allocate space for the ndistinct items (no space for each item's + * attnos: those live in bitmapsets allocated separately) + */ + ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) + + (ndist.nitems * sizeof(MVNDistinctItem))); + ndistinct->magic = ndist.magic; + ndistinct->type = ndist.type; + ndistinct->nitems = ndist.nitems; + + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem *item = &ndistinct->items[i]; + + /* ndistinct value */ + memcpy(&item->ndistinct, tmp, sizeof(double)); + tmp += sizeof(double); + + /* number of attributes */ + memcpy(&item->nattributes, tmp, sizeof(int)); + tmp += sizeof(int); + Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS)); + + item->attributes + = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber)); + + memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes); + tmp += sizeof(AttrNumber) * item->nattributes; + + /* still within the bytea */ + Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); + } + + /* we should have consumed the whole bytea exactly */ + Assert(tmp == ((char *) data + VARSIZE_ANY(data))); + + return ndistinct; +} + +/* + * pg_ndistinct_in + * input routine for type pg_ndistinct + * + * pg_ndistinct is real enough to be a table column, but it has no + * operations of its own, and disallows input (just like pg_node_tree). + */ +Datum +pg_ndistinct_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct + * output routine for type pg_ndistinct + * + * Produces a human-readable representation of the value. + */ +Datum +pg_ndistinct_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVNDistinct *ndist = statext_ndistinct_deserialize(data); + int i; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '{'); + + for (i = 0; i < ndist->nitems; i++) + { + int j; + MVNDistinctItem item = ndist->items[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + for (j = 0; j < item.nattributes; j++) + { + AttrNumber attnum = item.attributes[j]; + + appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum); + } + appendStringInfo(&str, "\": %d", (int) item.ndistinct); + } + + appendStringInfoChar(&str, '}'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_ndistinct_recv + * binary input routine for type pg_ndistinct + */ +Datum +pg_ndistinct_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct_send + * binary output routine for type pg_ndistinct + * + * n-distinct is serialized into a bytea value, so let's send that. + */ +Datum +pg_ndistinct_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * ndistinct_for_combination + * Estimates number of distinct values in a combination of columns. + * + * This uses the same ndistinct estimator as compute_scalar_stats() in + * ANALYZE, i.e., + * n*d / (n - f1 + f1*n/N) + * + * except that instead of values in a single column we are dealing with + * combination of multiple columns. + */ +static double +ndistinct_for_combination(double totalrows, StatsBuildData *data, + int k, int *combination) +{ + int i, + j; + int f1, + cnt, + d; + bool *isnull; + Datum *values; + SortItem *items; + MultiSortSupport mss; + int numrows = data->numrows; + + mss = multi_sort_init(k); + + /* + * In order to determine the number of distinct elements, create separate + * values[]/isnull[] arrays with all the data we have, then sort them + * using the specified column combination as dimensions. We could try to + * sort in place, but it'd probably be more complex and bug-prone. + */ + items = (SortItem *) palloc(numrows * sizeof(SortItem)); + values = (Datum *) palloc0(sizeof(Datum) * numrows * k); + isnull = (bool *) palloc0(sizeof(bool) * numrows * k); + + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * k]; + items[i].isnull = &isnull[i * k]; + } + + /* + * For each dimension, set up sort-support and fill in the values from the + * sample data. + * + * We use the column data types' default sort operators and collations; + * perhaps at some point it'd be worth using column-specific collations? + */ + for (i = 0; i < k; i++) + { + Oid typid; + TypeCacheEntry *type; + Oid collid = InvalidOid; + VacAttrStats *colstat = data->stats[combination[i]]; + + typid = colstat->attrtypid; + collid = colstat->attrcollid; + + type = lookup_type_cache(typid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + typid); + + /* prepare the sort function for this dimension */ + multi_sort_add_dimension(mss, i, type->lt_opr, collid); + + /* accumulate all the data for this dimension into the arrays */ + for (j = 0; j < numrows; j++) + { + items[j].values[i] = data->values[combination[i]][j]; + items[j].isnull[i] = data->nulls[combination[i]][j]; + } + } + + /* We can sort the array now ... */ + qsort_interruptible(items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + /* ... and count the number of distinct combinations */ + + f1 = 0; + cnt = 1; + d = 1; + for (i = 1; i < numrows; i++) + { + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + { + if (cnt == 1) + f1 += 1; + + d++; + cnt = 0; + } + + cnt += 1; + } + + if (cnt == 1) + f1 += 1; + + return estimate_ndistinct(totalrows, numrows, d, f1); +} + +/* The Duj1 estimator (already used in analyze.c). */ +static double +estimate_ndistinct(double totalrows, int numrows, int d, int f1) +{ + double numer, + denom, + ndistinct; + + numer = (double) numrows * (double) d; + + denom = (double) (numrows - f1) + + (double) f1 * (double) numrows / totalrows; + + ndistinct = numer / denom; + + /* Clamp to sane range in case of roundoff error */ + if (ndistinct < (double) d) + ndistinct = (double) d; + + if (ndistinct > totalrows) + ndistinct = totalrows; + + return floor(ndistinct + 0.5); +} + +/* + * n_choose_k + * computes binomial coefficients using an algorithm that is both + * efficient and prevents overflows + */ +static int +n_choose_k(int n, int k) +{ + int d, + r; + + Assert((k > 0) && (n >= k)); + + /* use symmetry of the binomial coefficients */ + k = Min(k, n - k); + + r = 1; + for (d = 1; d <= k; ++d) + { + r *= n--; + r /= d; + } + + return r; +} + +/* + * num_combinations + * number of combinations, excluding single-value combinations + */ +static int +num_combinations(int n) +{ + return (1 << n) - (n + 1); +} + +/* + * generator_init + * initialize the generator of combinations + * + * The generator produces combinations of K elements in the interval (0..N). + * We prebuild all the combinations in this method, which is simpler than + * generating them on the fly. + */ +static CombinationGenerator * +generator_init(int n, int k) +{ + CombinationGenerator *state; + + Assert((n >= k) && (k > 0)); + + /* allocate the generator state as a single chunk of memory */ + state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator)); + + state->ncombinations = n_choose_k(n, k); + + /* pre-allocate space for all combinations */ + state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations); + + state->current = 0; + state->k = k; + state->n = n; + + /* now actually pre-generate all the combinations of K elements */ + generate_combinations(state); + + /* make sure we got the expected number of combinations */ + Assert(state->current == state->ncombinations); + + /* reset the number, so we start with the first one */ + state->current = 0; + + return state; +} + +/* + * generator_next + * returns the next combination from the prebuilt list + * + * Returns a combination of K array indexes (0 .. N), as specified to + * generator_init), or NULL when there are no more combination. + */ +static int * +generator_next(CombinationGenerator *state) +{ + if (state->current == state->ncombinations) + return NULL; + + return &state->combinations[state->k * state->current++]; +} + +/* + * generator_free + * free the internal state of the generator + * + * Releases the generator internal state (pre-built combinations). + */ +static void +generator_free(CombinationGenerator *state) +{ + pfree(state->combinations); + pfree(state); +} + +/* + * generate_combinations_recurse + * given a prefix, generate all possible combinations + * + * Given a prefix (first few elements of the combination), generate following + * elements recursively. We generate the combinations in lexicographic order, + * which eliminates permutations of the same combination. + */ +static void +generate_combinations_recurse(CombinationGenerator *state, + int index, int start, int *current) +{ + /* If we haven't filled all the elements, simply recurse. */ + if (index < state->k) + { + int i; + + /* + * The values have to be in ascending order, so make sure we start + * with the value passed by parameter. + */ + + for (i = start; i < state->n; i++) + { + current[index] = i; + generate_combinations_recurse(state, (index + 1), (i + 1), current); + } + + return; + } + else + { + /* we got a valid combination, add it to the array */ + memcpy(&state->combinations[(state->k * state->current)], + current, state->k * sizeof(int)); + state->current++; + } +} + +/* + * generate_combinations + * generate all k-combinations of N elements + */ +static void +generate_combinations(CombinationGenerator *state) +{ + int *current = (int *) palloc0(sizeof(int) * state->k); + + generate_combinations_recurse(state, 0, 0, current); + + pfree(current); +} |