1 files changed, 1860 insertions, 0 deletions
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
new file mode 100644
index 0000000..a9187dd
--- /dev/null
+++ b/src/backend/statistics/dependencies.c
@@ -0,0 +1,1860 @@
+/*-------------------------------------------------------------------------
+ *
+ * dependencies.c
+ *	  POSTGRES functional dependencies
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/statistics/dependencies.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_statistic_ext_data.h"
+#include "lib/stringinfo.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/nodes.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "utils/bytea.h"
+#include "utils/fmgroids.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/selfuncs.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+/* size of the struct header fields (magic, type, ndeps) */
+#define SizeOfHeader		(3 * sizeof(uint32))
+
+/* size of a serialized dependency (degree, natts, atts) */
+#define SizeOfItem(natts) \
+	(sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
+
+/* minimal size of a dependency (with two attributes) */
+#define MinSizeOfItem	SizeOfItem(2)
+
+/* minimal size of dependencies, when all deps are minimal */
+#define MinSizeOfItems(ndeps) \
+	(SizeOfHeader + (ndeps) * MinSizeOfItem)
+
+/*
+ * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
+ * k-permutations of n elements, except that the order does not matter for the
+ * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
+ */
+typedef struct DependencyGeneratorData
+{
+	int			k;				/* size of the dependency */
+	int			n;				/* number of possible attributes */
+	int			current;		/* next dependency to return (index) */
+	AttrNumber	ndependencies;	/* number of dependencies generated */
+	AttrNumber *dependencies;	/* array of pre-generated dependencies	*/
+} DependencyGeneratorData;
+
+typedef DependencyGeneratorData *DependencyGenerator;
+
+static void generate_dependencies_recurse(DependencyGenerator state,
+										  int index, AttrNumber start, AttrNumber *current);
+static void generate_dependencies(DependencyGenerator state);
+static DependencyGenerator DependencyGenerator_init(int n, int k);
+static void DependencyGenerator_free(DependencyGenerator state);
+static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
+static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
+static bool dependency_is_fully_matched(MVDependency *dependency,
+										Bitmapset *attnums);
+static bool dependency_is_compatible_clause(Node *clause, Index relid,
+											AttrNumber *attnum);
+static bool dependency_is_compatible_expression(Node *clause, Index relid,
+												List *statlist, Node **expr);
+static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
+											   int ndependencies, Bitmapset *attnums);
+static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
+												 int varRelid, JoinType jointype,
+												 SpecialJoinInfo *sjinfo,
+												 MVDependency **dependencies,
+												 int ndependencies,
+												 AttrNumber *list_attnums,
+												 Bitmapset **estimatedclauses);
+
+static void
+generate_dependencies_recurse(DependencyGenerator state, int index,
+							  AttrNumber start, AttrNumber *current)
+{
+	/*
+	 * The generator handles the first (k-1) elements differently from the
+	 * last element.
+	 */
+	if (index < (state->k - 1))
+	{
+		AttrNumber	i;
+
+		/*
+		 * The first (k-1) values have to be in ascending order, which we
+		 * generate recursively.
+		 */
+
+		for (i = start; i < state->n; i++)
+		{
+			current[index] = i;
+			generate_dependencies_recurse(state, (index + 1), (i + 1), current);
+		}
+	}
+	else
+	{
+		int			i;
+
+		/*
+		 * the last element is the implied value, which does not respect the
+		 * ascending order. We just need to check that the value is not in the
+		 * first (k-1) elements.
+		 */
+
+		for (i = 0; i < state->n; i++)
+		{
+			int			j;
+			bool		match = false;
+
+			current[index] = i;
+
+			for (j = 0; j < index; j++)
+			{
+				if (current[j] == i)
+				{
+					match = true;
+					break;
+				}
+			}
+
+			/*
+			 * If the value is not found in the first part of the dependency,
+			 * we're done.
+			 */
+			if (!match)
+			{
+				state->dependencies = (AttrNumber *) repalloc(state->dependencies,
+															  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
+				memcpy(&state->dependencies[(state->k * state->ndependencies)],
+					   current, state->k * sizeof(AttrNumber));
+				state->ndependencies++;
+			}
+		}
+	}
+}
+
+/* generate all dependencies (k-permutations of n elements) */
+static void
+generate_dependencies(DependencyGenerator state)
+{
+	AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
+
+	generate_dependencies_recurse(state, 0, 0, current);
+
+	pfree(current);
+}
+
+/*
+ * initialize the DependencyGenerator of variations, and prebuild the variations
+ *
+ * This pre-builds all the variations. We could also generate them in
+ * DependencyGenerator_next(), but this seems simpler.
+ */
+static DependencyGenerator
+DependencyGenerator_init(int n, int k)
+{
+	DependencyGenerator state;
+
+	Assert((n >= k) && (k > 0));
+
+	/* allocate the DependencyGenerator state */
+	state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
+	state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
+
+	state->ndependencies = 0;
+	state->current = 0;
+	state->k = k;
+	state->n = n;
+
+	/* now actually pre-generate all the variations */
+	generate_dependencies(state);
+
+	return state;
+}
+
+/* free the DependencyGenerator state */
+static void
+DependencyGenerator_free(DependencyGenerator state)
+{
+	pfree(state->dependencies);
+	pfree(state);
+
+}
+
+/* generate next combination */
+static AttrNumber *
+DependencyGenerator_next(DependencyGenerator state)
+{
+	if (state->current == state->ndependencies)
+		return NULL;
+
+	return &state->dependencies[state->k * state->current++];
+}
+
+
+/*
+ * validates functional dependency on the data
+ *
+ * An actual work horse of detecting functional dependencies. Given a variation
+ * of k attributes, it checks that the first (k-1) are sufficient to determine
+ * the last one.
+ */
+static double
+dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
+{
+	int			i,
+				nitems;
+	MultiSortSupport mss;
+	SortItem   *items;
+	AttrNumber *attnums_dep;
+
+	/* counters valid within a group */
+	int			group_size = 0;
+	int			n_violations = 0;
+
+	/* total number of rows supporting (consistent with) the dependency */
+	int			n_supporting_rows = 0;
+
+	/* Make sure we have at least two input attributes. */
+	Assert(k >= 2);
+
+	/* sort info for all attributes columns */
+	mss = multi_sort_init(k);
+
+	/*
+	 * Translate the array of indexes to regular attnums for the dependency
+	 * (we will need this to identify the columns in StatsBuildData).
+	 */
+	attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
+	for (i = 0; i < k; i++)
+		attnums_dep[i] = data->attnums[dependency[i]];
+
+	/*
+	 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
+	 *
+	 * (a) sort the data lexicographically
+	 *
+	 * (b) split the data into groups by first (k-1) columns
+	 *
+	 * (c) for each group count different values in the last column
+	 *
+	 * We use the column data types' default sort operators and collations;
+	 * perhaps at some point it'd be worth using column-specific collations?
+	 */
+
+	/* prepare the sort function for the dimensions */
+	for (i = 0; i < k; i++)
+	{
+		VacAttrStats *colstat = data->stats[dependency[i]];
+		TypeCacheEntry *type;
+
+		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+		if (type->lt_opr == InvalidOid) /* shouldn't happen */
+			elog(ERROR, "cache lookup failed for ordering operator for type %u",
+				 colstat->attrtypid);
+
+		/* prepare the sort function for this dimension */
+		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
+	}
+
+	/*
+	 * build an array of SortItem(s) sorted using the multi-sort support
+	 *
+	 * XXX This relies on all stats entries pointing to the same tuple
+	 * descriptor.  For now that assumption holds, but it might change in the
+	 * future for example if we support statistics on multiple tables.
+	 */
+	items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
+
+	/*
+	 * Walk through the sorted array, split it into rows according to the
+	 * first (k-1) columns. If there's a single value in the last column, we
+	 * count the group as 'supporting' the functional dependency. Otherwise we
+	 * count it as contradicting.
+	 */
+
+	/* start with the first row forming a group */
+	group_size = 1;
+
+	/* loop 1 beyond the end of the array so that we count the final group */
+	for (i = 1; i <= nitems; i++)
+	{
+		/*
+		 * Check if the group ended, which may be either because we processed
+		 * all the items (i==nitems), or because the i-th item is not equal to
+		 * the preceding one.
+		 */
+		if (i == nitems ||
+			multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
+		{
+			/*
+			 * If no violations were found in the group then track the rows of
+			 * the group as supporting the functional dependency.
+			 */
+			if (n_violations == 0)
+				n_supporting_rows += group_size;
+
+			/* Reset counters for the new group */
+			n_violations = 0;
+			group_size = 1;
+			continue;
+		}
+		/* first columns match, but the last one does not (so contradicting) */
+		else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
+			n_violations++;
+
+		group_size++;
+	}
+
+	/* Compute the 'degree of validity' as (supporting/total). */
+	return (n_supporting_rows * 1.0 / data->numrows);
+}
+
+/*
+ * detects functional dependencies between groups of columns
+ *
+ * Generates all possible subsets of columns (variations) and computes
+ * the degree of validity for each one. For example when creating statistics
+ * on three columns (a,b,c) there are 9 possible dependencies
+ *
+ *	   two columns			  three columns
+ *	   -----------			  -------------
+ *	   (a) -> b				  (a,b) -> c
+ *	   (a) -> c				  (a,c) -> b
+ *	   (b) -> a				  (b,c) -> a
+ *	   (b) -> c
+ *	   (c) -> a
+ *	   (c) -> b
+ */
+MVDependencies *
+statext_dependencies_build(StatsBuildData *data)
+{
+	int			i,
+				k;
+
+	/* result */
+	MVDependencies *dependencies = NULL;
+	MemoryContext	cxt;
+
+	Assert(data->nattnums >= 2);
+
+	/* tracks memory allocated by dependency_degree calls */
+	cxt = AllocSetContextCreate(CurrentMemoryContext,
+								"dependency_degree cxt",
+								ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * We'll try build functional dependencies starting from the smallest ones
+	 * covering just 2 columns, to the largest ones, covering all columns
+	 * included in the statistics object.  We start from the smallest ones
+	 * because we want to be able to skip already implied ones.
+	 */
+	for (k = 2; k <= data->nattnums; k++)
+	{
+		AttrNumber *dependency; /* array with k elements */
+
+		/* prepare a DependencyGenerator of variation */
+		DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
+
+		/* generate all possible variations of k values (out of n) */
+		while ((dependency = DependencyGenerator_next(DependencyGenerator)))
+		{
+			double		degree;
+			MVDependency *d;
+			MemoryContext oldcxt;
+
+			/* release memory used by dependency degree calculation */
+			oldcxt = MemoryContextSwitchTo(cxt);
+
+			/* compute how valid the dependency seems */
+			degree = dependency_degree(data, k, dependency);
+
+			MemoryContextSwitchTo(oldcxt);
+			MemoryContextReset(cxt);
+
+			/*
+			 * if the dependency seems entirely invalid, don't store it
+			 */
+			if (degree == 0.0)
+				continue;
+
+			d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
+										 + k * sizeof(AttrNumber));
+
+			/* copy the dependency (and keep the indexes into stxkeys) */
+			d->degree = degree;
+			d->nattributes = k;
+			for (i = 0; i < k; i++)
+				d->attributes[i] = data->attnums[dependency[i]];
+
+			/* initialize the list of dependencies */
+			if (dependencies == NULL)
+			{
+				dependencies
+					= (MVDependencies *) palloc0(sizeof(MVDependencies));
+
+				dependencies->magic = STATS_DEPS_MAGIC;
+				dependencies->type = STATS_DEPS_TYPE_BASIC;
+				dependencies->ndeps = 0;
+			}
+
+			dependencies->ndeps++;
+			dependencies = (MVDependencies *) repalloc(dependencies,
+													   offsetof(MVDependencies, deps)
+													   + dependencies->ndeps * sizeof(MVDependency *));
+
+			dependencies->deps[dependencies->ndeps - 1] = d;
+		}
+
+		/*
+		 * we're done with variations of k elements, so free the
+		 * DependencyGenerator
+		 */
+		DependencyGenerator_free(DependencyGenerator);
+	}
+
+	MemoryContextDelete(cxt);
+
+	return dependencies;
+}
+
+
+/*
+ * Serialize list of dependencies into a bytea value.
+ */
+bytea *
+statext_dependencies_serialize(MVDependencies *dependencies)
+{
+	int			i;
+	bytea	   *output;
+	char	   *tmp;
+	Size		len;
+
+	/* we need to store ndeps, with a number of attributes for each one */
+	len = VARHDRSZ + SizeOfHeader;
+
+	/* and also include space for the actual attribute numbers and degrees */
+	for (i = 0; i < dependencies->ndeps; i++)
+		len += SizeOfItem(dependencies->deps[i]->nattributes);
+
+	output = (bytea *) palloc0(len);
+	SET_VARSIZE(output, len);
+
+	tmp = VARDATA(output);
+
+	/* Store the base struct values (magic, type, ndeps) */
+	memcpy(tmp, &dependencies->magic, sizeof(uint32));
+	tmp += sizeof(uint32);
+	memcpy(tmp, &dependencies->type, sizeof(uint32));
+	tmp += sizeof(uint32);
+	memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
+	tmp += sizeof(uint32);
+
+	/* store number of attributes and attribute numbers for each dependency */
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		MVDependency *d = dependencies->deps[i];
+
+		memcpy(tmp, &d->degree, sizeof(double));
+		tmp += sizeof(double);
+
+		memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
+		tmp += sizeof(AttrNumber);
+
+		memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
+		tmp += sizeof(AttrNumber) * d->nattributes;
+
+		/* protect against overflow */
+		Assert(tmp <= ((char *) output + len));
+	}
+
+	/* make sure we've produced exactly the right amount of data */
+	Assert(tmp == ((char *) output + len));
+
+	return output;
+}
+
+/*
+ * Reads serialized dependencies into MVDependencies structure.
+ */
+MVDependencies *
+statext_dependencies_deserialize(bytea *data)
+{
+	int			i;
+	Size		min_expected_size;
+	MVDependencies *dependencies;
+	char	   *tmp;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
+		elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
+			 VARSIZE_ANY_EXHDR(data), SizeOfHeader);
+
+	/* read the MVDependencies header */
+	dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA_ANY(data);
+
+	/* read the header fields and perform basic sanity checks */
+	memcpy(&dependencies->magic, tmp, sizeof(uint32));
+	tmp += sizeof(uint32);
+	memcpy(&dependencies->type, tmp, sizeof(uint32));
+	tmp += sizeof(uint32);
+	memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
+	tmp += sizeof(uint32);
+
+	if (dependencies->magic != STATS_DEPS_MAGIC)
+		elog(ERROR, "invalid dependency magic %d (expected %d)",
+			 dependencies->magic, STATS_DEPS_MAGIC);
+
+	if (dependencies->type != STATS_DEPS_TYPE_BASIC)
+		elog(ERROR, "invalid dependency type %d (expected %d)",
+			 dependencies->type, STATS_DEPS_TYPE_BASIC);
+
+	if (dependencies->ndeps == 0)
+		elog(ERROR, "invalid zero-length item array in MVDependencies");
+
+	/* what minimum bytea size do we expect for those parameters */
+	min_expected_size = SizeOfItem(dependencies->ndeps);
+
+	if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
+		elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
+			 VARSIZE_ANY_EXHDR(data), min_expected_size);
+
+	/* allocate space for the MCV items */
+	dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
+							+ (dependencies->ndeps * sizeof(MVDependency *)));
+
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		double		degree;
+		AttrNumber	k;
+		MVDependency *d;
+
+		/* degree of validity */
+		memcpy(&degree, tmp, sizeof(double));
+		tmp += sizeof(double);
+
+		/* number of attributes */
+		memcpy(&k, tmp, sizeof(AttrNumber));
+		tmp += sizeof(AttrNumber);
+
+		/* is the number of attributes valid? */
+		Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
+
+		/* now that we know the number of attributes, allocate the dependency */
+		d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
+									 + (k * sizeof(AttrNumber)));
+
+		d->degree = degree;
+		d->nattributes = k;
+
+		/* copy attribute numbers */
+		memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
+		tmp += sizeof(AttrNumber) * d->nattributes;
+
+		dependencies->deps[i] = d;
+
+		/* still within the bytea */
+		Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
+	}
+
+	/* we should have consumed the whole bytea exactly */
+	Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
+
+	return dependencies;
+}
+
+/*
+ * dependency_is_fully_matched
+ *		checks that a functional dependency is fully matched given clauses on
+ *		attributes (assuming the clauses are suitable equality clauses)
+ */
+static bool
+dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
+{
+	int			j;
+
+	/*
+	 * Check that the dependency actually is fully covered by clauses. We have
+	 * to translate all attribute numbers, as those are referenced
+	 */
+	for (j = 0; j < dependency->nattributes; j++)
+	{
+		int			attnum = dependency->attributes[j];
+
+		if (!bms_is_member(attnum, attnums))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * statext_dependencies_load
+ *		Load the functional dependencies for the indicated pg_statistic_ext tuple
+ */
+MVDependencies *
+statext_dependencies_load(Oid mvoid)
+{
+	MVDependencies *result;
+	bool		isnull;
+	Datum		deps;
+	HeapTuple	htup;
+
+	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+	if (!HeapTupleIsValid(htup))
+		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
+
+	deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
+						   Anum_pg_statistic_ext_data_stxddependencies, &isnull);
+	if (isnull)
+		elog(ERROR,
+			 "requested statistics kind \"%c\" is not yet built for statistics object %u",
+			 STATS_EXT_DEPENDENCIES, mvoid);
+
+	result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
+
+	ReleaseSysCache(htup);
+
+	return result;
+}
+
+/*
+ * pg_dependencies_in		- input routine for type pg_dependencies.
+ *
+ * pg_dependencies is real enough to be a table column, but it has no operations
+ * of its own, and disallows input too
+ */
+Datum
+pg_dependencies_in(PG_FUNCTION_ARGS)
+{
+	/*
+	 * pg_node_list stores the data in binary form and parsing text input is
+	 * not needed, so disallow this.
+	 */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_dependencies")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_dependencies		- output routine for type pg_dependencies.
+ */
+Datum
+pg_dependencies_out(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_PP(0);
+	MVDependencies *dependencies = statext_dependencies_deserialize(data);
+	int			i,
+				j;
+	StringInfoData str;
+
+	initStringInfo(&str);
+	appendStringInfoChar(&str, '{');
+
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		MVDependency *dependency = dependencies->deps[i];
+
+		if (i > 0)
+			appendStringInfoString(&str, ", ");
+
+		appendStringInfoChar(&str, '"');
+		for (j = 0; j < dependency->nattributes; j++)
+		{
+			if (j == dependency->nattributes - 1)
+				appendStringInfoString(&str, " => ");
+			else if (j > 0)
+				appendStringInfoString(&str, ", ");
+
+			appendStringInfo(&str, "%d", dependency->attributes[j]);
+		}
+		appendStringInfo(&str, "\": %f", dependency->degree);
+	}
+
+	appendStringInfoChar(&str, '}');
+
+	PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_dependencies_recv		- binary input routine for type pg_dependencies.
+ */
+Datum
+pg_dependencies_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_dependencies")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_dependencies_send		- binary output routine for type pg_dependencies.
+ *
+ * Functional dependencies are serialized in a bytea value (although the type
+ * is named differently), so let's just send that.
+ */
+Datum
+pg_dependencies_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
+
+/*
+ * dependency_is_compatible_clause
+ *		Determines if the clause is compatible with functional dependencies
+ *
+ * Only clauses that have the form of equality to a pseudoconstant, or can be
+ * interpreted that way, are currently accepted.  Furthermore the variable
+ * part of the clause must be a simple Var belonging to the specified
+ * relation, whose attribute number we return in *attnum on success.
+ */
+static bool
+dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
+{
+	Var		   *var;
+	Node	   *clause_expr;
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not interesting (they couldn't contain a Var) */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* Clauses referencing multiple, or no, varnos are incompatible */
+		if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+			return false;
+
+		clause = (Node *) rinfo->clause;
+	}
+
+	if (is_opclause(clause))
+	{
+		/* If it's an opclause, check for Var = Const or Const = Var. */
+		OpExpr	   *expr = (OpExpr *) clause;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/* Make sure non-selected argument is a pseudoconstant. */
+		if (is_pseudo_constant_clause(lsecond(expr->args)))
+			clause_expr = linitial(expr->args);
+		else if (is_pseudo_constant_clause(linitial(expr->args)))
+			clause_expr = lsecond(expr->args);
+		else
+			return false;
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 *
+		 * XXX this is pretty dubious; probably it'd be better to check btree
+		 * or hash opclass membership, so as not to be fooled by custom
+		 * selectivity functions, and to be more consistent with decisions
+		 * elsewhere in the planner.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (IsA(clause, ScalarArrayOpExpr))
+	{
+		/* If it's an scalar array operator, check for Var IN Const. */
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
+
+		/*
+		 * Reject ALL() variant, we only care about ANY/IN.
+		 *
+		 * XXX Maybe we should check if all the values are the same, and allow
+		 * ALL in that case? Doesn't seem very practical, though.
+		 */
+		if (!expr->useOr)
+			return false;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/*
+		 * We know it's always (Var IN Const), so we assume the var is the
+		 * first argument, and pseudoconstant is the second one.
+		 */
+		if (!is_pseudo_constant_clause(lsecond(expr->args)))
+			return false;
+
+		clause_expr = linitial(expr->args);
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies. The operator is identified
+		 * simply by looking at which function it uses to estimate
+		 * selectivity. That's a bit strange, but it's what other similar
+		 * places do.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (is_orclause(clause))
+	{
+		BoolExpr   *bool_expr = (BoolExpr *) clause;
+		ListCell   *lc;
+
+		/* start with no attribute number */
+		*attnum = InvalidAttrNumber;
+
+		foreach(lc, bool_expr->args)
+		{
+			AttrNumber	clause_attnum;
+
+			/*
+			 * Had we found incompatible clause in the arguments, treat the
+			 * whole clause as incompatible.
+			 */
+			if (!dependency_is_compatible_clause((Node *) lfirst(lc),
+												 relid, &clause_attnum))
+				return false;
+
+			if (*attnum == InvalidAttrNumber)
+				*attnum = clause_attnum;
+
+			/* ensure all the variables are the same (same attnum) */
+			if (*attnum != clause_attnum)
+				return false;
+		}
+
+		/* the Var is already checked by the recursive call */
+		return true;
+	}
+	else if (is_notclause(clause))
+	{
+		/*
+		 * "NOT x" can be interpreted as "x = false", so get the argument and
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) get_notclausearg(clause);
+	}
+	else
+	{
+		/*
+		 * A boolean expression "x" can be interpreted as "x = true", so
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) clause;
+	}
+
+	/*
+	 * We may ignore any RelabelType node above the operand.  (There won't be
+	 * more than one, since eval_const_expressions has been applied already.)
+	 */
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+	/* We only support plain Vars for now */
+	if (!IsA(clause_expr, Var))
+		return false;
+
+	/* OK, we know we have a Var */
+	var = (Var *) clause_expr;
+
+	/* Ensure Var is from the correct relation */
+	if (var->varno != relid)
+		return false;
+
+	/* We also better ensure the Var is from the current level */
+	if (var->varlevelsup != 0)
+		return false;
+
+	/* Also ignore system attributes (we don't allow stats on those) */
+	if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+		return false;
+
+	*attnum = var->varattno;
+	return true;
+}
+
+/*
+ * find_strongest_dependency
+ *		find the strongest dependency on the attributes
+ *
+ * When applying functional dependencies, we start with the strongest
+ * dependencies. That is, we select the dependency that:
+ *
+ * (a) has all attributes covered by equality clauses
+ *
+ * (b) has the most attributes
+ *
+ * (c) has the highest degree of validity
+ *
+ * This guarantees that we eliminate the most redundant conditions first
+ * (see the comment in dependencies_clauselist_selectivity).
+ */
+static MVDependency *
+find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
+						  Bitmapset *attnums)
+{
+	int			i,
+				j;
+	MVDependency *strongest = NULL;
+
+	/* number of attnums in clauses */
+	int			nattnums = bms_num_members(attnums);
+
+	/*
+	 * Iterate over the MVDependency items and find the strongest one from the
+	 * fully-matched dependencies. We do the cheap checks first, before
+	 * matching it against the attnums.
+	 */
+	for (i = 0; i < ndependencies; i++)
+	{
+		for (j = 0; j < dependencies[i]->ndeps; j++)
+		{
+			MVDependency *dependency = dependencies[i]->deps[j];
+
+			/*
+			 * Skip dependencies referencing more attributes than available
+			 * clauses, as those can't be fully matched.
+			 */
+			if (dependency->nattributes > nattnums)
+				continue;
+
+			if (strongest)
+			{
+				/* skip dependencies on fewer attributes than the strongest. */
+				if (dependency->nattributes < strongest->nattributes)
+					continue;
+
+				/* also skip weaker dependencies when attribute count matches */
+				if (strongest->nattributes == dependency->nattributes &&
+					strongest->degree > dependency->degree)
+					continue;
+			}
+
+			/*
+			 * this dependency is stronger, but we must still check that it's
+			 * fully matched to these attnums. We perform this check last as
+			 * it's slightly more expensive than the previous checks.
+			 */
+			if (dependency_is_fully_matched(dependency, attnums))
+				strongest = dependency; /* save new best match */
+		}
+	}
+
+	return strongest;
+}
+
+/*
+ * clauselist_apply_dependencies
+ *		Apply the specified functional dependencies to a list of clauses and
+ *		return the estimated selectivity of the clauses that are compatible
+ *		with any of the given dependencies.
+ *
+ * This will estimate all not-already-estimated clauses that are compatible
+ * with functional dependencies, and which have an attribute mentioned by any
+ * of the given dependencies (either as an implying or implied attribute).
+ *
+ * Given (lists of) clauses on attributes (a,b) and a functional dependency
+ * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
+ * using the formula
+ *
+ *		P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
+ *
+ * where 'f' is the degree of dependency.  This reflects the fact that we
+ * expect a fraction f of all rows to be consistent with the dependency
+ * (a=>b), and so have a selectivity of P(a), while the remaining rows are
+ * treated as independent.
+ *
+ * In practice, we use a slightly modified version of this formula, which uses
+ * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
+ * should obviously not exceed either column's individual selectivity.  I.e.,
+ * we actually combine selectivities using the formula
+ *
+ *		P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
+ *
+ * This can make quite a difference if the specific values matching the
+ * clauses are not consistent with the functional dependency.
+ */
+static Selectivity
+clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
+							  int varRelid, JoinType jointype,
+							  SpecialJoinInfo *sjinfo,
+							  MVDependency **dependencies, int ndependencies,
+							  AttrNumber *list_attnums,
+							  Bitmapset **estimatedclauses)
+{
+	Bitmapset  *attnums;
+	int			i;
+	int			j;
+	int			nattrs;
+	Selectivity *attr_sel;
+	int			attidx;
+	int			listidx;
+	ListCell   *l;
+	Selectivity s1;
+
+	/*
+	 * Extract the attnums of all implying and implied attributes from all the
+	 * given dependencies.  Each of these attributes is expected to have at
+	 * least 1 not-already-estimated compatible clause that we will estimate
+	 * here.
+	 */
+	attnums = NULL;
+	for (i = 0; i < ndependencies; i++)
+	{
+		for (j = 0; j < dependencies[i]->nattributes; j++)
+		{
+			AttrNumber	attnum = dependencies[i]->attributes[j];
+
+			attnums = bms_add_member(attnums, attnum);
+		}
+	}
+
+	/*
+	 * Compute per-column selectivity estimates for each of these attributes,
+	 * and mark all the corresponding clauses as estimated.
+	 */
+	nattrs = bms_num_members(attnums);
+	attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
+
+	attidx = 0;
+	i = -1;
+	while ((i = bms_next_member(attnums, i)) >= 0)
+	{
+		List	   *attr_clauses = NIL;
+		Selectivity simple_sel;
+
+		listidx = -1;
+		foreach(l, clauses)
+		{
+			Node	   *clause = (Node *) lfirst(l);
+
+			listidx++;
+			if (list_attnums[listidx] == i)
+			{
+				attr_clauses = lappend(attr_clauses, clause);
+				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
+			}
+		}
+
+		simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
+												jointype, sjinfo, false);
+		attr_sel[attidx++] = simple_sel;
+	}
+
+	/*
+	 * Now combine these selectivities using the dependency information.  For
+	 * chains of dependencies such as a -> b -> c, the b -> c dependency will
+	 * come before the a -> b dependency in the array, so we traverse the
+	 * array backwards to ensure such chains are computed in the right order.
+	 *
+	 * As explained above, pairs of selectivities are combined using the
+	 * formula
+	 *
+	 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
+	 *
+	 * to ensure that the combined selectivity is never greater than either
+	 * individual selectivity.
+	 *
+	 * Where multiple dependencies apply (e.g., a -> b -> c), we use
+	 * conditional probabilities to compute the overall result as follows:
+	 *
+	 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
+	 *
+	 * so we replace the selectivities of all implied attributes with
+	 * conditional probabilities, that are conditional on all their implying
+	 * attributes.  The selectivities of all other non-implied attributes are
+	 * left as they are.
+	 */
+	for (i = ndependencies - 1; i >= 0; i--)
+	{
+		MVDependency *dependency = dependencies[i];
+		AttrNumber	attnum;
+		Selectivity s2;
+		double		f;
+
+		/* Selectivity of all the implying attributes */
+		s1 = 1.0;
+		for (j = 0; j < dependency->nattributes - 1; j++)
+		{
+			attnum = dependency->attributes[j];
+			attidx = bms_member_index(attnums, attnum);
+			s1 *= attr_sel[attidx];
+		}
+
+		/* Original selectivity of the implied attribute */
+		attnum = dependency->attributes[j];
+		attidx = bms_member_index(attnums, attnum);
+		s2 = attr_sel[attidx];
+
+		/*
+		 * Replace s2 with the conditional probability s2 given s1, computed
+		 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
+		 *
+		 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
+		 *
+		 * where P(a) = s1, the selectivity of the implying attributes, and
+		 * P(b) = s2, the selectivity of the implied attribute.
+		 */
+		f = dependency->degree;
+
+		if (s1 <= s2)
+			attr_sel[attidx] = f + (1 - f) * s2;
+		else
+			attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
+	}
+
+	/*
+	 * The overall selectivity of all the clauses on all these attributes is
+	 * then the product of all the original (non-implied) probabilities and
+	 * the new conditional (implied) probabilities.
+	 */
+	s1 = 1.0;
+	for (i = 0; i < nattrs; i++)
+		s1 *= attr_sel[i];
+
+	CLAMP_PROBABILITY(s1);
+
+	pfree(attr_sel);
+	bms_free(attnums);
+
+	return s1;
+}
+
+/*
+ * dependency_is_compatible_expression
+ *		Determines if the expression is compatible with functional dependencies
+ *
+ * Similar to dependency_is_compatible_clause, but doesn't enforce that the
+ * expression is a simple Var. OTOH we check that there's at least one
+ * statistics object matching the expression.
+ */
+static bool
+dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
+{
+	List	   *vars;
+	ListCell   *lc,
+			   *lc2;
+	Node	   *clause_expr;
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not interesting (they couldn't contain a Var) */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* Clauses referencing multiple, or no, varnos are incompatible */
+		if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+			return false;
+
+		clause = (Node *) rinfo->clause;
+	}
+
+	if (is_opclause(clause))
+	{
+		/* If it's an opclause, check for Var = Const or Const = Var. */
+		OpExpr	   *expr = (OpExpr *) clause;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/* Make sure non-selected argument is a pseudoconstant. */
+		if (is_pseudo_constant_clause(lsecond(expr->args)))
+			clause_expr = linitial(expr->args);
+		else if (is_pseudo_constant_clause(linitial(expr->args)))
+			clause_expr = lsecond(expr->args);
+		else
+			return false;
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 *
+		 * XXX this is pretty dubious; probably it'd be better to check btree
+		 * or hash opclass membership, so as not to be fooled by custom
+		 * selectivity functions, and to be more consistent with decisions
+		 * elsewhere in the planner.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (IsA(clause, ScalarArrayOpExpr))
+	{
+		/* If it's an scalar array operator, check for Var IN Const. */
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
+
+		/*
+		 * Reject ALL() variant, we only care about ANY/IN.
+		 *
+		 * FIXME Maybe we should check if all the values are the same, and
+		 * allow ALL in that case? Doesn't seem very practical, though.
+		 */
+		if (!expr->useOr)
+			return false;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/*
+		 * We know it's always (Var IN Const), so we assume the var is the
+		 * first argument, and pseudoconstant is the second one.
+		 */
+		if (!is_pseudo_constant_clause(lsecond(expr->args)))
+			return false;
+
+		clause_expr = linitial(expr->args);
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies. The operator is identified
+		 * simply by looking at which function it uses to estimate
+		 * selectivity. That's a bit strange, but it's what other similar
+		 * places do.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (is_orclause(clause))
+	{
+		BoolExpr   *bool_expr = (BoolExpr *) clause;
+		ListCell   *lc;
+
+		/* start with no expression (we'll use the first match) */
+		*expr = NULL;
+
+		foreach(lc, bool_expr->args)
+		{
+			Node	   *or_expr = NULL;
+
+			/*
+			 * Had we found incompatible expression in the arguments, treat
+			 * the whole expression as incompatible.
+			 */
+			if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
+													 statlist, &or_expr))
+				return false;
+
+			if (*expr == NULL)
+				*expr = or_expr;
+
+			/* ensure all the expressions are the same */
+			if (!equal(or_expr, *expr))
+				return false;
+		}
+
+		/* the expression is already checked by the recursive call */
+		return true;
+	}
+	else if (is_notclause(clause))
+	{
+		/*
+		 * "NOT x" can be interpreted as "x = false", so get the argument and
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) get_notclausearg(clause);
+	}
+	else
+	{
+		/*
+		 * A boolean expression "x" can be interpreted as "x = true", so
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) clause;
+	}
+
+	/*
+	 * We may ignore any RelabelType node above the operand.  (There won't be
+	 * more than one, since eval_const_expressions has been applied already.)
+	 */
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+	vars = pull_var_clause(clause_expr, 0);
+
+	foreach(lc, vars)
+	{
+		Var		   *var = (Var *) lfirst(lc);
+
+		/* Ensure Var is from the correct relation */
+		if (var->varno != relid)
+			return false;
+
+		/* We also better ensure the Var is from the current level */
+		if (var->varlevelsup != 0)
+			return false;
+
+		/* Also ignore system attributes (we don't allow stats on those) */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return false;
+	}
+
+	/*
+	 * Check if we actually have a matching statistics for the expression.
+	 *
+	 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
+	 */
+	foreach(lc, statlist)
+	{
+		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+
+		/* ignore stats without dependencies */
+		if (info->kind != STATS_EXT_DEPENDENCIES)
+			continue;
+
+		foreach(lc2, info->exprs)
+		{
+			Node	   *stat_expr = (Node *) lfirst(lc2);
+
+			if (equal(clause_expr, stat_expr))
+			{
+				*expr = stat_expr;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/*
+ * dependencies_clauselist_selectivity
+ *		Return the estimated selectivity of (a subset of) the given clauses
+ *		using functional dependency statistics, or 1.0 if no useful functional
+ *		dependency statistic exists.
+ *
+ * 'estimatedclauses' is an input/output argument that gets a bit set
+ * corresponding to the (zero-based) list index of each clause that is included
+ * in the estimated selectivity.
+ *
+ * Given equality clauses on attributes (a,b) we find the strongest dependency
+ * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
+ * dependency, we then combine the per-clause selectivities using the formula
+ *
+ *	   P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
+ *
+ * where 'f' is the degree of the dependency.  (Actually we use a slightly
+ * modified version of this formula -- see clauselist_apply_dependencies()).
+ *
+ * With clauses on more than two attributes, the dependencies are applied
+ * recursively, starting with the widest/strongest dependencies. For example
+ * P(a,b,c) is first split like this:
+ *
+ *	   P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
+ *
+ * assuming (a,b=>c) is the strongest dependency.
+ */
+Selectivity
+dependencies_clauselist_selectivity(PlannerInfo *root,
+									List *clauses,
+									int varRelid,
+									JoinType jointype,
+									SpecialJoinInfo *sjinfo,
+									RelOptInfo *rel,
+									Bitmapset **estimatedclauses)
+{
+	Selectivity s1 = 1.0;
+	ListCell   *l;
+	Bitmapset  *clauses_attnums = NULL;
+	AttrNumber *list_attnums;
+	int			listidx;
+	MVDependencies **func_dependencies;
+	int			nfunc_dependencies;
+	int			total_ndeps;
+	MVDependency **dependencies;
+	int			ndependencies;
+	int			i;
+	AttrNumber	attnum_offset;
+	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
+
+	/* unique expressions */
+	Node	  **unique_exprs;
+	int			unique_exprs_cnt;
+
+	/*
+	 * When dealing with regular inheritance trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data there's no data in the
+	 * non-leaf relations, so we build stats only for the inheritance tree.
+	 * So for partitioned tables we do consider extended stats.
+	 */
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
+		return 1.0;
+
+	/* check if there's any stats that might be useful for us. */
+	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
+		return 1.0;
+
+	list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
+										 list_length(clauses));
+
+	/*
+	 * We allocate space as if every clause was a unique expression, although
+	 * that's probably overkill. Some will be simple column references that
+	 * we'll translate to attnums, and there might be duplicates. But it's
+	 * easier and cheaper to just do one allocation than repalloc later.
+	 */
+	unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
+	unique_exprs_cnt = 0;
+
+	/*
+	 * Pre-process the clauses list to extract the attnums seen in each item.
+	 * We need to determine if there's any clauses which will be useful for
+	 * dependency selectivity estimations. Along the way we'll record all of
+	 * the attnums for each clause in a list which we'll reference later so we
+	 * don't need to repeat the same work again. We'll also keep track of all
+	 * attnums seen.
+	 *
+	 * We also skip clauses that we already estimated using different types of
+	 * statistics (we treat them as incompatible).
+	 *
+	 * To handle expressions, we assign them negative attnums, as if it was a
+	 * system attribute (this is fine, as we only allow extended stats on user
+	 * attributes). And then we offset everything by the number of
+	 * expressions, so that we can store the values in a bitmapset.
+	 */
+	listidx = 0;
+	foreach(l, clauses)
+	{
+		Node	   *clause = (Node *) lfirst(l);
+		AttrNumber	attnum;
+		Node	   *expr = NULL;
+
+		/* ignore clause by default */
+		list_attnums[listidx] = InvalidAttrNumber;
+
+		if (!bms_is_member(listidx, *estimatedclauses))
+		{
+			/*
+			 * If it's a simple column reference, just extract the attnum. If
+			 * it's an expression, assign a negative attnum as if it was a
+			 * system attribute.
+			 */
+			if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
+			{
+				list_attnums[listidx] = attnum;
+			}
+			else if (dependency_is_compatible_expression(clause, rel->relid,
+														 rel->statlist,
+														 &expr))
+			{
+				/* special attnum assigned to this expression */
+				attnum = InvalidAttrNumber;
+
+				Assert(expr != NULL);
+
+				/* If the expression is duplicate, use the same attnum. */
+				for (i = 0; i < unique_exprs_cnt; i++)
+				{
+					if (equal(unique_exprs[i], expr))
+					{
+						/* negative attribute number to expression */
+						attnum = -(i + 1);
+						break;
+					}
+				}
+
+				/* not found in the list, so add it */
+				if (attnum == InvalidAttrNumber)
+				{
+					unique_exprs[unique_exprs_cnt++] = expr;
+
+					/* after incrementing the value, to get -1, -2, ... */
+					attnum = (-unique_exprs_cnt);
+				}
+
+				/* remember which attnum was assigned to this clause */
+				list_attnums[listidx] = attnum;
+			}
+		}
+
+		listidx++;
+	}
+
+	Assert(listidx == list_length(clauses));
+
+	/*
+	 * How much we need to offset the attnums? If there are no expressions,
+	 * then no offset is needed. Otherwise we need to offset enough for the
+	 * lowest value (-unique_exprs_cnt) to become 1.
+	 */
+	if (unique_exprs_cnt > 0)
+		attnum_offset = (unique_exprs_cnt + 1);
+	else
+		attnum_offset = 0;
+
+	/*
+	 * Now that we know how many expressions there are, we can offset the
+	 * values just enough to build the bitmapset.
+	 */
+	for (i = 0; i < list_length(clauses); i++)
+	{
+		AttrNumber	attnum;
+
+		/* ignore incompatible or already estimated clauses */
+		if (list_attnums[i] == InvalidAttrNumber)
+			continue;
+
+		/* make sure the attnum is in the expected range */
+		Assert(list_attnums[i] >= (-unique_exprs_cnt));
+		Assert(list_attnums[i] <= MaxHeapAttributeNumber);
+
+		/* make sure the attnum is positive (valid AttrNumber) */
+		attnum = list_attnums[i] + attnum_offset;
+
+		/*
+		 * Either it's a regular attribute, or it's an expression, in which
+		 * case we must not have seen it before (expressions are unique).
+		 *
+		 * XXX Check whether it's a regular attribute has to be done using the
+		 * original attnum, while the second check has to use the value with
+		 * an offset.
+		 */
+		Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
+			   !bms_is_member(attnum, clauses_attnums));
+
+		/*
+		 * Remember the offset attnum, both for attributes and expressions.
+		 * We'll pass list_attnums to clauselist_apply_dependencies, which
+		 * uses it to identify clauses in a bitmap. We could also pass the
+		 * offset, but this is more convenient.
+		 */
+		list_attnums[i] = attnum;
+
+		clauses_attnums = bms_add_member(clauses_attnums, attnum);
+	}
+
+	/*
+	 * If there's not at least two distinct attnums and expressions, then
+	 * reject the whole list of clauses. We must return 1.0 so the calling
+	 * function's selectivity is unaffected.
+	 */
+	if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
+	{
+		bms_free(clauses_attnums);
+		pfree(list_attnums);
+		return 1.0;
+	}
+
+	/*
+	 * Load all functional dependencies matching at least two parameters. We
+	 * can simply consider all dependencies at once, without having to search
+	 * for the best statistics object.
+	 *
+	 * To not waste cycles and memory, we deserialize dependencies only for
+	 * statistics that match at least two attributes. The array is allocated
+	 * with the assumption that all objects match - we could grow the array to
+	 * make it just the right size, but it's likely wasteful anyway thanks to
+	 * moving the freed chunks to freelists etc.
+	 */
+	func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
+												   list_length(rel->statlist));
+	nfunc_dependencies = 0;
+	total_ndeps = 0;
+
+	foreach(l, rel->statlist)
+	{
+		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
+		int			nmatched;
+		int			nexprs;
+		int			k;
+		MVDependencies *deps;
+
+		/* skip statistics that are not of the correct type */
+		if (stat->kind != STATS_EXT_DEPENDENCIES)
+			continue;
+
+		/*
+		 * Count matching attributes - we have to undo the attnum offsets. The
+		 * input attribute numbers are not offset (expressions are not
+		 * included in stat->keys, so it's not necessary). But we need to
+		 * offset it before checking against clauses_attnums.
+		 */
+		nmatched = 0;
+		k = -1;
+		while ((k = bms_next_member(stat->keys, k)) >= 0)
+		{
+			AttrNumber	attnum = (AttrNumber) k;
+
+			/* skip expressions */
+			if (!AttrNumberIsForUserDefinedAttr(attnum))
+				continue;
+
+			/* apply the same offset as above */
+			attnum += attnum_offset;
+
+			if (bms_is_member(attnum, clauses_attnums))
+				nmatched++;
+		}
+
+		/* count matching expressions */
+		nexprs = 0;
+		for (i = 0; i < unique_exprs_cnt; i++)
+		{
+			ListCell   *lc;
+
+			foreach(lc, stat->exprs)
+			{
+				Node	   *stat_expr = (Node *) lfirst(lc);
+
+				/* try to match it */
+				if (equal(stat_expr, unique_exprs[i]))
+					nexprs++;
+			}
+		}
+
+		/*
+		 * Skip objects matching fewer than two attributes/expressions from
+		 * clauses.
+		 */
+		if (nmatched + nexprs < 2)
+			continue;
+
+		deps = statext_dependencies_load(stat->statOid);
+
+		/*
+		 * The expressions may be represented by different attnums in the
+		 * stats, we need to remap them to be consistent with the clauses.
+		 * That will make the later steps (e.g. picking the strongest item and
+		 * so on) much simpler and cheaper, because it won't need to care
+		 * about the offset at all.
+		 *
+		 * When we're at it, we can ignore dependencies that are not fully
+		 * matched by clauses (i.e. referencing attributes or expressions that
+		 * are not in the clauses).
+		 *
+		 * We have to do this for all statistics, as long as there are any
+		 * expressions - we need to shift the attnums in all dependencies.
+		 *
+		 * XXX Maybe we should do this always, because it also eliminates some
+		 * of the dependencies early. It might be cheaper than having to walk
+		 * the longer list in find_strongest_dependency later, especially as
+		 * we need to do that repeatedly?
+		 *
+		 * XXX We have to do this even when there are no expressions in
+		 * clauses, otherwise find_strongest_dependency may fail for stats
+		 * with expressions (due to lookup of negative value in bitmap). So we
+		 * need to at least filter out those dependencies. Maybe we could do
+		 * it in a cheaper way (if there are no expr clauses, we can just
+		 * discard all negative attnums without any lookups).
+		 */
+		if (unique_exprs_cnt > 0 || stat->exprs != NIL)
+		{
+			int			ndeps = 0;
+
+			for (i = 0; i < deps->ndeps; i++)
+			{
+				bool		skip = false;
+				MVDependency *dep = deps->deps[i];
+				int			j;
+
+				for (j = 0; j < dep->nattributes; j++)
+				{
+					int			idx;
+					Node	   *expr;
+					int			k;
+					AttrNumber	unique_attnum = InvalidAttrNumber;
+					AttrNumber	attnum;
+
+					/* undo the per-statistics offset */
+					attnum = dep->attributes[j];
+
+					/*
+					 * For regular attributes we can simply check if it
+					 * matches any clause. If there's no matching clause, we
+					 * can just ignore it. We need to offset the attnum
+					 * though.
+					 */
+					if (AttrNumberIsForUserDefinedAttr(attnum))
+					{
+						dep->attributes[j] = attnum + attnum_offset;
+
+						if (!bms_is_member(dep->attributes[j], clauses_attnums))
+						{
+							skip = true;
+							break;
+						}
+
+						continue;
+					}
+
+					/*
+					 * the attnum should be a valid system attnum (-1, -2,
+					 * ...)
+					 */
+					Assert(AttributeNumberIsValid(attnum));
+
+					/*
+					 * For expressions, we need to do two translations. First
+					 * we have to translate the negative attnum to index in
+					 * the list of expressions (in the statistics object).
+					 * Then we need to see if there's a matching clause. The
+					 * index of the unique expression determines the attnum
+					 * (and we offset it).
+					 */
+					idx = -(1 + attnum);
+
+					/* Is the expression index is valid? */
+					Assert((idx >= 0) && (idx < list_length(stat->exprs)));
+
+					expr = (Node *) list_nth(stat->exprs, idx);
+
+					/* try to find the expression in the unique list */
+					for (k = 0; k < unique_exprs_cnt; k++)
+					{
+						/*
+						 * found a matching unique expression, use the attnum
+						 * (derived from index of the unique expression)
+						 */
+						if (equal(unique_exprs[k], expr))
+						{
+							unique_attnum = -(k + 1) + attnum_offset;
+							break;
+						}
+					}
+
+					/*
+					 * Found no matching expression, so we can simply skip
+					 * this dependency, because there's no chance it will be
+					 * fully covered.
+					 */
+					if (unique_attnum == InvalidAttrNumber)
+					{
+						skip = true;
+						break;
+					}
+
+					/* otherwise remap it to the new attnum */
+					dep->attributes[j] = unique_attnum;
+				}
+
+				/* if found a matching dependency, keep it */
+				if (!skip)
+				{
+					/* maybe we've skipped something earlier, so move it */
+					if (ndeps != i)
+						deps->deps[ndeps] = deps->deps[i];
+
+					ndeps++;
+				}
+			}
+
+			deps->ndeps = ndeps;
+		}
+
+		/*
+		 * It's possible we've removed all dependencies, in which case we
+		 * don't bother adding it to the list.
+		 */
+		if (deps->ndeps > 0)
+		{
+			func_dependencies[nfunc_dependencies] = deps;
+			total_ndeps += deps->ndeps;
+			nfunc_dependencies++;
+		}
+	}
+
+	/* if no matching stats could be found then we've nothing to do */
+	if (nfunc_dependencies == 0)
+	{
+		pfree(func_dependencies);
+		bms_free(clauses_attnums);
+		pfree(list_attnums);
+		pfree(unique_exprs);
+		return 1.0;
+	}
+
+	/*
+	 * Work out which dependencies we can apply, starting with the
+	 * widest/strongest ones, and proceeding to smaller/weaker ones.
+	 */
+	dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
+											total_ndeps);
+	ndependencies = 0;
+
+	while (true)
+	{
+		MVDependency *dependency;
+		AttrNumber	attnum;
+
+		/* the widest/strongest dependency, fully matched by clauses */
+		dependency = find_strongest_dependency(func_dependencies,
+											   nfunc_dependencies,
+											   clauses_attnums);
+		if (!dependency)
+			break;
+
+		dependencies[ndependencies++] = dependency;
+
+		/* Ignore dependencies using this implied attribute in later loops */
+		attnum = dependency->attributes[dependency->nattributes - 1];
+		clauses_attnums = bms_del_member(clauses_attnums, attnum);
+	}
+
+	/*
+	 * If we found applicable dependencies, use them to estimate all
+	 * compatible clauses on attributes that they refer to.
+	 */
+	if (ndependencies != 0)
+		s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
+										   sjinfo, dependencies, ndependencies,
+										   list_attnums, estimatedclauses);
+
+	/* free deserialized functional dependencies (and then the array) */
+	for (i = 0; i < nfunc_dependencies; i++)
+		pfree(func_dependencies[i]);
+
+	pfree(dependencies);
+	pfree(func_dependencies);
+	bms_free(clauses_attnums);
+	pfree(list_attnums);
+	pfree(unique_exprs);
+
+	return s1;
+}