Adding upstream version 14.5.upstream/14.5 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
commit: 46651ce6fe013220ed397add242004d764fc0153 (patch)
tree: 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/utils/adt/like_support.c
parent: Initial commit. (diff)
download: postgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
1 files changed, 1770 insertions, 0 deletions
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
new file mode 100644
index 0000000..241e6f0
--- /dev/null
+++ b/src/backend/utils/adt/like_support.c
@@ -0,0 +1,1770 @@
+/*-------------------------------------------------------------------------
+ *
+ * like_support.c
+ *	  Planner support functions for LIKE, regex, and related operators.
+ *
+ * These routines handle special optimization of operators that can be
+ * used with index scans even though they are not known to the executor's
+ * indexscan machinery.  The key idea is that these operators allow us
+ * to derive approximate indexscan qual clauses, such that any tuples
+ * that pass the operator clause itself must also satisfy the simpler
+ * indexscan condition(s).  Then we can use the indexscan machinery
+ * to avoid scanning as much of the table as we'd otherwise have to,
+ * while applying the original operator as a qpqual condition to ensure
+ * we deliver only the tuples we want.  (In essence, we're using a regular
+ * index as if it were a lossy index.)
+ *
+ * An example of what we're doing is
+ *			textfield LIKE 'abc%def'
+ * from which we can generate the indexscanable conditions
+ *			textfield >= 'abc' AND textfield < 'abd'
+ * which allow efficient scanning of an index on textfield.
+ * (In reality, character set and collation issues make the transformation
+ * from LIKE to indexscan limits rather harder than one might think ...
+ * but that's the basic idea.)
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/like_support.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "access/stratnum.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/selfuncs.h"
+#include "utils/varlena.h"
+
+
+typedef enum
+{
+	Pattern_Type_Like,
+	Pattern_Type_Like_IC,
+	Pattern_Type_Regex,
+	Pattern_Type_Regex_IC,
+	Pattern_Type_Prefix
+} Pattern_Type;
+
+typedef enum
+{
+	Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+
+static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
+static List *match_pattern_prefix(Node *leftop,
+								  Node *rightop,
+								  Pattern_Type ptype,
+								  Oid expr_coll,
+								  Oid opfamily,
+								  Oid indexcollation);
+static double patternsel_common(PlannerInfo *root,
+								Oid oprid,
+								Oid opfuncid,
+								List *args,
+								int varRelid,
+								Oid collation,
+								Pattern_Type ptype,
+								bool negate);
+static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
+												  Pattern_Type ptype,
+												  Oid collation,
+												  Const **prefix,
+												  Selectivity *rest_selec);
+static Selectivity prefix_selectivity(PlannerInfo *root,
+									  VariableStatData *vardata,
+									  Oid eqopr, Oid ltopr, Oid geopr,
+									  Oid collation,
+									  Const *prefixcon);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+									bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+									 bool case_insensitive,
+									 int fixed_prefix_len);
+static int	pattern_char_isalpha(char c, bool is_multibyte,
+								 pg_locale_t locale, bool locale_is_c);
+static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
+								  Oid collation);
+static Datum string_to_datum(const char *str, Oid datatype);
+static Const *string_to_const(const char *str, Oid datatype);
+static Const *string_to_bytea_const(const char *str, size_t str_len);
+
+
+/*
+ * Planner support functions for LIKE, regex, and related operators
+ */
+Datum
+textlike_support(PG_FUNCTION_ARGS)
+{
+	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+	PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
+}
+
+Datum
+texticlike_support(PG_FUNCTION_ARGS)
+{
+	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+	PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
+}
+
+Datum
+textregexeq_support(PG_FUNCTION_ARGS)
+{
+	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+	PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
+}
+
+Datum
+texticregexeq_support(PG_FUNCTION_ARGS)
+{
+	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+	PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
+}
+
+/* Common code for the above */
+static Node *
+like_regex_support(Node *rawreq, Pattern_Type ptype)
+{
+	Node	   *ret = NULL;
+
+	if (IsA(rawreq, SupportRequestSelectivity))
+	{
+		/*
+		 * Make a selectivity estimate for a function call, just as we'd do if
+		 * the call was via the corresponding operator.
+		 */
+		SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+		Selectivity s1;
+
+		if (req->is_join)
+		{
+			/*
+			 * For the moment we just punt.  If patternjoinsel is ever
+			 * improved to do better, this should be made to call it.
+			 */
+			s1 = DEFAULT_MATCH_SEL;
+		}
+		else
+		{
+			/* Share code with operator restriction selectivity functions */
+			s1 = patternsel_common(req->root,
+								   InvalidOid,
+								   req->funcid,
+								   req->args,
+								   req->varRelid,
+								   req->inputcollid,
+								   ptype,
+								   false);
+		}
+		req->selectivity = s1;
+		ret = (Node *) req;
+	}
+	else if (IsA(rawreq, SupportRequestIndexCondition))
+	{
+		/* Try to convert operator/function call to index conditions */
+		SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
+
+		/*
+		 * Currently we have no "reverse" match operators with the pattern on
+		 * the left, so we only need consider cases with the indexkey on the
+		 * left.
+		 */
+		if (req->indexarg != 0)
+			return NULL;
+
+		if (is_opclause(req->node))
+		{
+			OpExpr	   *clause = (OpExpr *) req->node;
+
+			Assert(list_length(clause->args) == 2);
+			ret = (Node *)
+				match_pattern_prefix((Node *) linitial(clause->args),
+									 (Node *) lsecond(clause->args),
+									 ptype,
+									 clause->inputcollid,
+									 req->opfamily,
+									 req->indexcollation);
+		}
+		else if (is_funcclause(req->node))	/* be paranoid */
+		{
+			FuncExpr   *clause = (FuncExpr *) req->node;
+
+			Assert(list_length(clause->args) == 2);
+			ret = (Node *)
+				match_pattern_prefix((Node *) linitial(clause->args),
+									 (Node *) lsecond(clause->args),
+									 ptype,
+									 clause->inputcollid,
+									 req->opfamily,
+									 req->indexcollation);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * match_pattern_prefix
+ *	  Try to generate an indexqual for a LIKE or regex operator.
+ */
+static List *
+match_pattern_prefix(Node *leftop,
+					 Node *rightop,
+					 Pattern_Type ptype,
+					 Oid expr_coll,
+					 Oid opfamily,
+					 Oid indexcollation)
+{
+	List	   *result;
+	Const	   *patt;
+	Const	   *prefix;
+	Pattern_Prefix_Status pstatus;
+	Oid			ldatatype;
+	Oid			rdatatype;
+	Oid			eqopr;
+	Oid			ltopr;
+	Oid			geopr;
+	bool		collation_aware;
+	Expr	   *expr;
+	FmgrInfo	ltproc;
+	Const	   *greaterstr;
+
+	/*
+	 * Can't do anything with a non-constant or NULL pattern argument.
+	 *
+	 * Note that since we restrict ourselves to cases with a hard constant on
+	 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
+	 * about verifying that.
+	 */
+	if (!IsA(rightop, Const) ||
+		((Const *) rightop)->constisnull)
+		return NIL;
+	patt = (Const *) rightop;
+
+	/*
+	 * Not supported if the expression collation is nondeterministic.  The
+	 * optimized equality or prefix tests use bytewise comparisons, which is
+	 * not consistent with nondeterministic collations.  The actual
+	 * pattern-matching implementation functions will later error out that
+	 * pattern-matching is not supported with nondeterministic collations. (We
+	 * could also error out here, but by doing it later we get more precise
+	 * error messages.)  (It should be possible to support at least
+	 * Pattern_Prefix_Exact, but no point as long as the actual
+	 * pattern-matching implementations don't support it.)
+	 *
+	 * expr_coll is not set for a non-collation-aware data type such as bytea.
+	 */
+	if (expr_coll && !get_collation_isdeterministic(expr_coll))
+		return NIL;
+
+	/*
+	 * Try to extract a fixed prefix from the pattern.
+	 */
+	pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
+								   &prefix, NULL);
+
+	/* fail if no fixed prefix */
+	if (pstatus == Pattern_Prefix_None)
+		return NIL;
+
+	/*
+	 * Identify the operators we want to use, based on the type of the
+	 * left-hand argument.  Usually these are just the type's regular
+	 * comparison operators, but if we are considering one of the semi-legacy
+	 * "pattern" opclasses, use the "pattern" operators instead.  Those are
+	 * not collation-sensitive but always use C collation, as we want.  The
+	 * selected operators also determine the needed type of the prefix
+	 * constant.
+	 */
+	ldatatype = exprType(leftop);
+	switch (ldatatype)
+	{
+		case TEXTOID:
+			if (opfamily == TEXT_PATTERN_BTREE_FAM_OID ||
+				opfamily == TEXT_SPGIST_FAM_OID)
+			{
+				eqopr = TextEqualOperator;
+				ltopr = TextPatternLessOperator;
+				geopr = TextPatternGreaterEqualOperator;
+				collation_aware = false;
+			}
+			else
+			{
+				eqopr = TextEqualOperator;
+				ltopr = TextLessOperator;
+				geopr = TextGreaterEqualOperator;
+				collation_aware = true;
+			}
+			rdatatype = TEXTOID;
+			break;
+		case NAMEOID:
+
+			/*
+			 * Note that here, we need the RHS type to be text, so that the
+			 * comparison value isn't improperly truncated to NAMEDATALEN.
+			 */
+			eqopr = NameEqualTextOperator;
+			ltopr = NameLessTextOperator;
+			geopr = NameGreaterEqualTextOperator;
+			collation_aware = true;
+			rdatatype = TEXTOID;
+			break;
+		case BPCHAROID:
+			if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
+			{
+				eqopr = BpcharEqualOperator;
+				ltopr = BpcharPatternLessOperator;
+				geopr = BpcharPatternGreaterEqualOperator;
+				collation_aware = false;
+			}
+			else
+			{
+				eqopr = BpcharEqualOperator;
+				ltopr = BpcharLessOperator;
+				geopr = BpcharGreaterEqualOperator;
+				collation_aware = true;
+			}
+			rdatatype = BPCHAROID;
+			break;
+		case BYTEAOID:
+			eqopr = ByteaEqualOperator;
+			ltopr = ByteaLessOperator;
+			geopr = ByteaGreaterEqualOperator;
+			collation_aware = false;
+			rdatatype = BYTEAOID;
+			break;
+		default:
+			/* Can't get here unless we're attached to the wrong operator */
+			return NIL;
+	}
+
+	/*
+	 * If necessary, verify that the index's collation behavior is compatible.
+	 * For an exact-match case, we don't have to be picky.  Otherwise, insist
+	 * that the index collation be "C".  Note that here we are looking at the
+	 * index's collation, not the expression's collation -- this test is *not*
+	 * dependent on the LIKE/regex operator's collation.
+	 */
+	if (collation_aware)
+	{
+		if (!(pstatus == Pattern_Prefix_Exact ||
+			  lc_collate_is_c(indexcollation)))
+			return NIL;
+	}
+
+	/*
+	 * If necessary, coerce the prefix constant to the right type.  The given
+	 * prefix constant is either text or bytea type, therefore the only case
+	 * where we need to do anything is when converting text to bpchar.  Those
+	 * two types are binary-compatible, so relabeling the Const node is
+	 * sufficient.
+	 */
+	if (prefix->consttype != rdatatype)
+	{
+		Assert(prefix->consttype == TEXTOID &&
+			   rdatatype == BPCHAROID);
+		prefix->consttype = rdatatype;
+	}
+
+	/*
+	 * If we found an exact-match pattern, generate an "=" indexqual.
+	 *
+	 * Here and below, check to see whether the desired operator is actually
+	 * supported by the index opclass, and fail quietly if not.  This allows
+	 * us to not be concerned with specific opclasses (except for the legacy
+	 * "pattern" cases); any index that correctly implements the operators
+	 * will work.
+	 */
+	if (pstatus == Pattern_Prefix_Exact)
+	{
+		if (!op_in_opfamily(eqopr, opfamily))
+			return NIL;
+		expr = make_opclause(eqopr, BOOLOID, false,
+							 (Expr *) leftop, (Expr *) prefix,
+							 InvalidOid, indexcollation);
+		result = list_make1(expr);
+		return result;
+	}
+
+	/*
+	 * Otherwise, we have a nonempty required prefix of the values.
+	 *
+	 * We can always say "x >= prefix".
+	 */
+	if (!op_in_opfamily(geopr, opfamily))
+		return NIL;
+	expr = make_opclause(geopr, BOOLOID, false,
+						 (Expr *) leftop, (Expr *) prefix,
+						 InvalidOid, indexcollation);
+	result = list_make1(expr);
+
+	/*-------
+	 * If we can create a string larger than the prefix, we can say
+	 * "x < greaterstr".  NB: we rely on make_greater_string() to generate
+	 * a guaranteed-greater string, not just a probably-greater string.
+	 * In general this is only guaranteed in C locale, so we'd better be
+	 * using a C-locale index collation.
+	 *-------
+	 */
+	if (!op_in_opfamily(ltopr, opfamily))
+		return result;
+	fmgr_info(get_opcode(ltopr), &ltproc);
+	greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
+	if (greaterstr)
+	{
+		expr = make_opclause(ltopr, BOOLOID, false,
+							 (Expr *) leftop, (Expr *) greaterstr,
+							 InvalidOid, indexcollation);
+		result = lappend(result, expr);
+	}
+
+	return result;
+}
+
+
+/*
+ * patternsel_common - generic code for pattern-match restriction selectivity.
+ *
+ * To support using this from either the operator or function paths, caller
+ * may pass either operator OID or underlying function OID; we look up the
+ * latter from the former if needed.  (We could just have patternsel() call
+ * get_opcode(), but the work would be wasted if we don't have a need to
+ * compare a fixed prefix to the pg_statistic data.)
+ *
+ * Note that oprid and/or opfuncid should be for the positive-match operator
+ * even when negate is true.
+ */
+static double
+patternsel_common(PlannerInfo *root,
+				  Oid oprid,
+				  Oid opfuncid,
+				  List *args,
+				  int varRelid,
+				  Oid collation,
+				  Pattern_Type ptype,
+				  bool negate)
+{
+	VariableStatData vardata;
+	Node	   *other;
+	bool		varonleft;
+	Datum		constval;
+	Oid			consttype;
+	Oid			vartype;
+	Oid			rdatatype;
+	Oid			eqopr;
+	Oid			ltopr;
+	Oid			geopr;
+	Pattern_Prefix_Status pstatus;
+	Const	   *patt;
+	Const	   *prefix = NULL;
+	Selectivity rest_selec = 0;
+	double		nullfrac = 0.0;
+	double		result;
+
+	/*
+	 * Initialize result to the appropriate default estimate depending on
+	 * whether it's a match or not-match operator.
+	 */
+	if (negate)
+		result = 1.0 - DEFAULT_MATCH_SEL;
+	else
+		result = DEFAULT_MATCH_SEL;
+
+	/*
+	 * If expression is not variable op constant, then punt and return the
+	 * default estimate.
+	 */
+	if (!get_restriction_variable(root, args, varRelid,
+								  &vardata, &other, &varonleft))
+		return result;
+	if (!varonleft || !IsA(other, Const))
+	{
+		ReleaseVariableStats(vardata);
+		return result;
+	}
+
+	/*
+	 * If the constant is NULL, assume operator is strict and return zero, ie,
+	 * operator will never return TRUE.  (It's zero even for a negator op.)
+	 */
+	if (((Const *) other)->constisnull)
+	{
+		ReleaseVariableStats(vardata);
+		return 0.0;
+	}
+	constval = ((Const *) other)->constvalue;
+	consttype = ((Const *) other)->consttype;
+
+	/*
+	 * The right-hand const is type text or bytea for all supported operators.
+	 * We do not expect to see binary-compatible types here, since
+	 * const-folding should have relabeled the const to exactly match the
+	 * operator's declared type.
+	 */
+	if (consttype != TEXTOID && consttype != BYTEAOID)
+	{
+		ReleaseVariableStats(vardata);
+		return result;
+	}
+
+	/*
+	 * Similarly, the exposed type of the left-hand side should be one of
+	 * those we know.  (Do not look at vardata.atttype, which might be
+	 * something binary-compatible but different.)	We can use it to identify
+	 * the comparison operators and the required type of the comparison
+	 * constant, much as in match_pattern_prefix().
+	 */
+	vartype = vardata.vartype;
+
+	switch (vartype)
+	{
+		case TEXTOID:
+			eqopr = TextEqualOperator;
+			ltopr = TextLessOperator;
+			geopr = TextGreaterEqualOperator;
+			rdatatype = TEXTOID;
+			break;
+		case NAMEOID:
+
+			/*
+			 * Note that here, we need the RHS type to be text, so that the
+			 * comparison value isn't improperly truncated to NAMEDATALEN.
+			 */
+			eqopr = NameEqualTextOperator;
+			ltopr = NameLessTextOperator;
+			geopr = NameGreaterEqualTextOperator;
+			rdatatype = TEXTOID;
+			break;
+		case BPCHAROID:
+			eqopr = BpcharEqualOperator;
+			ltopr = BpcharLessOperator;
+			geopr = BpcharGreaterEqualOperator;
+			rdatatype = BPCHAROID;
+			break;
+		case BYTEAOID:
+			eqopr = ByteaEqualOperator;
+			ltopr = ByteaLessOperator;
+			geopr = ByteaGreaterEqualOperator;
+			rdatatype = BYTEAOID;
+			break;
+		default:
+			/* Can't get here unless we're attached to the wrong operator */
+			ReleaseVariableStats(vardata);
+			return result;
+	}
+
+	/*
+	 * Grab the nullfrac for use below.
+	 */
+	if (HeapTupleIsValid(vardata.statsTuple))
+	{
+		Form_pg_statistic stats;
+
+		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+		nullfrac = stats->stanullfrac;
+	}
+
+	/*
+	 * Pull out any fixed prefix implied by the pattern, and estimate the
+	 * fractional selectivity of the remainder of the pattern.  Unlike many
+	 * other selectivity estimators, we use the pattern operator's actual
+	 * collation for this step.  This is not because we expect the collation
+	 * to make a big difference in the selectivity estimate (it seldom would),
+	 * but because we want to be sure we cache compiled regexps under the
+	 * right cache key, so that they can be re-used at runtime.
+	 */
+	patt = (Const *) other;
+	pstatus = pattern_fixed_prefix(patt, ptype, collation,
+								   &prefix, &rest_selec);
+
+	/*
+	 * If necessary, coerce the prefix constant to the right type.  The only
+	 * case where we need to do anything is when converting text to bpchar.
+	 * Those two types are binary-compatible, so relabeling the Const node is
+	 * sufficient.
+	 */
+	if (prefix && prefix->consttype != rdatatype)
+	{
+		Assert(prefix->consttype == TEXTOID &&
+			   rdatatype == BPCHAROID);
+		prefix->consttype = rdatatype;
+	}
+
+	if (pstatus == Pattern_Prefix_Exact)
+	{
+		/*
+		 * Pattern specifies an exact match, so estimate as for '='
+		 */
+		result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
+							  false, true, false);
+	}
+	else
+	{
+		/*
+		 * Not exact-match pattern.  If we have a sufficiently large
+		 * histogram, estimate selectivity for the histogram part of the
+		 * population by counting matches in the histogram.  If not, estimate
+		 * selectivity of the fixed prefix and remainder of pattern
+		 * separately, then combine the two to get an estimate of the
+		 * selectivity for the part of the column population represented by
+		 * the histogram.  (For small histograms, we combine these
+		 * approaches.)
+		 *
+		 * We then add up data for any most-common-values values; these are
+		 * not in the histogram population, and we can get exact answers for
+		 * them by applying the pattern operator, so there's no reason to
+		 * approximate.  (If the MCVs cover a significant part of the total
+		 * population, this gives us a big leg up in accuracy.)
+		 */
+		Selectivity selec;
+		int			hist_size;
+		FmgrInfo	opproc;
+		double		mcv_selec,
+					sumcommon;
+
+		/* Try to use the histogram entries to get selectivity */
+		if (!OidIsValid(opfuncid))
+			opfuncid = get_opcode(oprid);
+		fmgr_info(opfuncid, &opproc);
+
+		selec = histogram_selectivity(&vardata, &opproc, collation,
+									  constval, true,
+									  10, 1, &hist_size);
+
+		/* If not at least 100 entries, use the heuristic method */
+		if (hist_size < 100)
+		{
+			Selectivity heursel;
+			Selectivity prefixsel;
+
+			if (pstatus == Pattern_Prefix_Partial)
+				prefixsel = prefix_selectivity(root, &vardata,
+											   eqopr, ltopr, geopr,
+											   collation,
+											   prefix);
+			else
+				prefixsel = 1.0;
+			heursel = prefixsel * rest_selec;
+
+			if (selec < 0)		/* fewer than 10 histogram entries? */
+				selec = heursel;
+			else
+			{
+				/*
+				 * For histogram sizes from 10 to 100, we combine the
+				 * histogram and heuristic selectivities, putting increasingly
+				 * more trust in the histogram for larger sizes.
+				 */
+				double		hist_weight = hist_size / 100.0;
+
+				selec = selec * hist_weight + heursel * (1.0 - hist_weight);
+			}
+		}
+
+		/* In any case, don't believe extremely small or large estimates. */
+		if (selec < 0.0001)
+			selec = 0.0001;
+		else if (selec > 0.9999)
+			selec = 0.9999;
+
+		/*
+		 * If we have most-common-values info, add up the fractions of the MCV
+		 * entries that satisfy MCV OP PATTERN.  These fractions contribute
+		 * directly to the result selectivity.  Also add up the total fraction
+		 * represented by MCV entries.
+		 */
+		mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
+									constval, true,
+									&sumcommon);
+
+		/*
+		 * Now merge the results from the MCV and histogram calculations,
+		 * realizing that the histogram covers only the non-null values that
+		 * are not listed in MCV.
+		 */
+		selec *= 1.0 - nullfrac - sumcommon;
+		selec += mcv_selec;
+		result = selec;
+	}
+
+	/* now adjust if we wanted not-match rather than match */
+	if (negate)
+		result = 1.0 - result - nullfrac;
+
+	/* result should be in range, but make sure... */
+	CLAMP_PROBABILITY(result);
+
+	if (prefix)
+	{
+		pfree(DatumGetPointer(prefix->constvalue));
+		pfree(prefix);
+	}
+
+	ReleaseVariableStats(vardata);
+
+	return result;
+}
+
+/*
+ * Fix impedance mismatch between SQL-callable functions and patternsel_common
+ */
+static double
+patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	int			varRelid = PG_GETARG_INT32(3);
+	Oid			collation = PG_GET_COLLATION();
+
+	/*
+	 * If this is for a NOT LIKE or similar operator, get the corresponding
+	 * positive-match operator and work with that.
+	 */
+	if (negate)
+	{
+		operator = get_negator(operator);
+		if (!OidIsValid(operator))
+			elog(ERROR, "patternsel called for operator without a negator");
+	}
+
+	return patternsel_common(root,
+							 operator,
+							 InvalidOid,
+							 args,
+							 varRelid,
+							 collation,
+							 ptype,
+							 negate);
+}
+
+/*
+ *		regexeqsel		- Selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ *		icregexeqsel	- Selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ *		likesel			- Selectivity of LIKE pattern match.
+ */
+Datum
+likesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ *		prefixsel			- selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
+ *		iclikesel			- Selectivity of ILIKE pattern match.
+ */
+Datum
+iclikesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ *		regexnesel		- Selectivity of regular-expression pattern non-match.
+ */
+Datum
+regexnesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ *		icregexnesel	- Selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ *		nlikesel		- Selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ *		icnlikesel		- Selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikesel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+/*
+ * patternjoinsel		- Generic code for pattern-match join selectivity.
+ */
+static double
+patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+	/* For the moment we just punt. */
+	return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
+}
+
+/*
+ *		regexeqjoinsel	- Join selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqjoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ *		icregexeqjoinsel	- Join selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqjoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ *		likejoinsel			- Join selectivity of LIKE pattern match.
+ */
+Datum
+likejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ *		prefixjoinsel			- Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *		iclikejoinsel			- Join selectivity of ILIKE pattern match.
+ */
+Datum
+iclikejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ *		regexnejoinsel	- Join selectivity of regex non-match.
+ */
+Datum
+regexnejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ *		icregexnejoinsel	- Join selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ *		nlikejoinsel		- Join selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ *		icnlikejoinsel		- Join selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikejoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer.  It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
+ * must be conservative: if we report a string longer than the true fixed
+ * prefix, the query may produce actually wrong answers, rather than just
+ * getting a bad selectivity estimate!
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ *
+ * *prefix is set to a palloc'd prefix string (in the form of a Const node),
+ *	or to NULL if no fixed prefix exists for the pattern.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ *	selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
+ *
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+
+static Pattern_Prefix_Status
+like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+				  Const **prefix_const, Selectivity *rest_selec)
+{
+	char	   *match;
+	char	   *patt;
+	int			pattlen;
+	Oid			typeid = patt_const->consttype;
+	int			pos,
+				match_pos;
+	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
+	pg_locale_t locale = 0;
+	bool		locale_is_c = false;
+
+	/* the right-hand const is type text or bytea */
+	Assert(typeid == BYTEAOID || typeid == TEXTOID);
+
+	if (case_insensitive)
+	{
+		if (typeid == BYTEAOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("case insensitive matching not supported on type bytea")));
+
+		/* If case-insensitive, we need locale info */
+		if (lc_ctype_is_c(collation))
+			locale_is_c = true;
+		else if (collation != DEFAULT_COLLATION_OID)
+		{
+			if (!OidIsValid(collation))
+			{
+				/*
+				 * This typically means that the parser could not resolve a
+				 * conflict of implicit collations, so report it that way.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for ILIKE"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+			}
+			locale = pg_newlocale_from_collation(collation);
+		}
+	}
+
+	if (typeid != BYTEAOID)
+	{
+		patt = TextDatumGetCString(patt_const->constvalue);
+		pattlen = strlen(patt);
+	}
+	else
+	{
+		bytea	   *bstr = DatumGetByteaPP(patt_const->constvalue);
+
+		pattlen = VARSIZE_ANY_EXHDR(bstr);
+		patt = (char *) palloc(pattlen);
+		memcpy(patt, VARDATA_ANY(bstr), pattlen);
+		Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
+	}
+
+	match = palloc(pattlen + 1);
+	match_pos = 0;
+	for (pos = 0; pos < pattlen; pos++)
+	{
+		/* % and _ are wildcard characters in LIKE */
+		if (patt[pos] == '%' ||
+			patt[pos] == '_')
+			break;
+
+		/* Backslash escapes the next character */
+		if (patt[pos] == '\\')
+		{
+			pos++;
+			if (pos >= pattlen)
+				break;
+		}
+
+		/* Stop if case-varying character (it's sort of a wildcard) */
+		if (case_insensitive &&
+			pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+			break;
+
+		match[match_pos++] = patt[pos];
+	}
+
+	match[match_pos] = '\0';
+
+	if (typeid != BYTEAOID)
+		*prefix_const = string_to_const(match, typeid);
+	else
+		*prefix_const = string_to_bytea_const(match, match_pos);
+
+	if (rest_selec != NULL)
+		*rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+									   case_insensitive);
+
+	pfree(patt);
+	pfree(match);
+
+	/* in LIKE, an empty pattern is an exact match! */
+	if (pos == pattlen)
+		return Pattern_Prefix_Exact;	/* reached end of pattern, so exact */
+
+	if (match_pos > 0)
+		return Pattern_Prefix_Partial;
+
+	return Pattern_Prefix_None;
+}
+
+static Pattern_Prefix_Status
+regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+				   Const **prefix_const, Selectivity *rest_selec)
+{
+	Oid			typeid = patt_const->consttype;
+	char	   *prefix;
+	bool		exact;
+
+	/*
+	 * Should be unnecessary, there are no bytea regex operators defined. As
+	 * such, it should be noted that the rest of this function has *not* been
+	 * made safe for binary (possibly NULL containing) strings.
+	 */
+	if (typeid == BYTEAOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("regular-expression matching not supported on type bytea")));
+
+	/* Use the regexp machinery to extract the prefix, if any */
+	prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+								 case_insensitive, collation,
+								 &exact);
+
+	if (prefix == NULL)
+	{
+		*prefix_const = NULL;
+
+		if (rest_selec != NULL)
+		{
+			char	   *patt = TextDatumGetCString(patt_const->constvalue);
+
+			*rest_selec = regex_selectivity(patt, strlen(patt),
+											case_insensitive,
+											0);
+			pfree(patt);
+		}
+
+		return Pattern_Prefix_None;
+	}
+
+	*prefix_const = string_to_const(prefix, typeid);
+
+	if (rest_selec != NULL)
+	{
+		if (exact)
+		{
+			/* Exact match, so there's no additional selectivity */
+			*rest_selec = 1.0;
+		}
+		else
+		{
+			char	   *patt = TextDatumGetCString(patt_const->constvalue);
+
+			*rest_selec = regex_selectivity(patt, strlen(patt),
+											case_insensitive,
+											strlen(prefix));
+			pfree(patt);
+		}
+	}
+
+	pfree(prefix);
+
+	if (exact)
+		return Pattern_Prefix_Exact;	/* pattern specifies exact match */
+	else
+		return Pattern_Prefix_Partial;
+}
+
+static Pattern_Prefix_Status
+pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
+					 Const **prefix, Selectivity *rest_selec)
+{
+	Pattern_Prefix_Status result;
+
+	switch (ptype)
+	{
+		case Pattern_Type_Like:
+			result = like_fixed_prefix(patt, false, collation,
+									   prefix, rest_selec);
+			break;
+		case Pattern_Type_Like_IC:
+			result = like_fixed_prefix(patt, true, collation,
+									   prefix, rest_selec);
+			break;
+		case Pattern_Type_Regex:
+			result = regex_fixed_prefix(patt, false, collation,
+										prefix, rest_selec);
+			break;
+		case Pattern_Type_Regex_IC:
+			result = regex_fixed_prefix(patt, true, collation,
+										prefix, rest_selec);
+			break;
+		case Pattern_Type_Prefix:
+			/* Prefix type work is trivial.  */
+			result = Pattern_Prefix_Partial;
+			*rest_selec = 1.0;	/* all */
+			*prefix = makeConst(patt->consttype,
+								patt->consttypmod,
+								patt->constcollid,
+								patt->constlen,
+								datumCopy(patt->constvalue,
+										  patt->constbyval,
+										  patt->constlen),
+								patt->constisnull,
+								patt->constbyval);
+			break;
+		default:
+			elog(ERROR, "unrecognized ptype: %d", (int) ptype);
+			result = Pattern_Prefix_None;	/* keep compiler quiet */
+			break;
+	}
+	return result;
+}
+
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "variable >= 'foo' AND variable < 'fop'".
+ *
+ * The selectivity estimate is with respect to the portion of the column
+ * population represented by the histogram --- the caller must fold this
+ * together with info about MCVs and NULLs.
+ *
+ * We use the given comparison operators and collation to do the estimation.
+ * The given variable and Const must be of the associated datatype(s).
+ *
+ * XXX Note: we make use of the upper bound to estimate operator selectivity
+ * even if the locale is such that we cannot rely on the upper-bound string.
+ * The selectivity only needs to be approximately right anyway, so it seems
+ * more useful to use the upper-bound code than not.
+ */
+static Selectivity
+prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
+				   Oid eqopr, Oid ltopr, Oid geopr,
+				   Oid collation,
+				   Const *prefixcon)
+{
+	Selectivity prefixsel;
+	FmgrInfo	opproc;
+	Const	   *greaterstrcon;
+	Selectivity eq_sel;
+
+	/* Estimate the selectivity of "x >= prefix" */
+	fmgr_info(get_opcode(geopr), &opproc);
+
+	prefixsel = ineq_histogram_selectivity(root, vardata,
+										   geopr, &opproc, true, true,
+										   collation,
+										   prefixcon->constvalue,
+										   prefixcon->consttype);
+
+	if (prefixsel < 0.0)
+	{
+		/* No histogram is present ... return a suitable default estimate */
+		return DEFAULT_MATCH_SEL;
+	}
+
+	/*
+	 * If we can create a string larger than the prefix, say "x < greaterstr".
+	 */
+	fmgr_info(get_opcode(ltopr), &opproc);
+	greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
+	if (greaterstrcon)
+	{
+		Selectivity topsel;
+
+		topsel = ineq_histogram_selectivity(root, vardata,
+											ltopr, &opproc, false, false,
+											collation,
+											greaterstrcon->constvalue,
+											greaterstrcon->consttype);
+
+		/* ineq_histogram_selectivity worked before, it shouldn't fail now */
+		Assert(topsel >= 0.0);
+
+		/*
+		 * Merge the two selectivities in the same way as for a range query
+		 * (see clauselist_selectivity()).  Note that we don't need to worry
+		 * about double-exclusion of nulls, since ineq_histogram_selectivity
+		 * doesn't count those anyway.
+		 */
+		prefixsel = topsel + prefixsel - 1.0;
+	}
+
+	/*
+	 * If the prefix is long then the two bounding values might be too close
+	 * together for the histogram to distinguish them usefully, resulting in a
+	 * zero estimate (plus or minus roundoff error). To avoid returning a
+	 * ridiculously small estimate, compute the estimated selectivity for
+	 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
+	 * estimate should be at least that.)
+	 *
+	 * We apply this even if we couldn't make a greater string.  That case
+	 * suggests that the prefix is near the maximum possible, and thus
+	 * probably off the end of the histogram, and thus we probably got a very
+	 * small estimate from the >= condition; so we still need to clamp.
+	 */
+	eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
+						  false, true, false);
+
+	prefixsel = Max(prefixsel, eq_sel);
+
+	return prefixsel;
+}
+
+
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+
+#define FIXED_CHAR_SEL	0.20	/* about 1/5 */
+#define CHAR_RANGE_SEL	0.25
+#define ANY_CHAR_SEL	0.9		/* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+
+static Selectivity
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
+{
+	Selectivity sel = 1.0;
+	int			pos;
+
+	/* Skip any leading wildcard; it's already factored into initial sel */
+	for (pos = 0; pos < pattlen; pos++)
+	{
+		if (patt[pos] != '%' && patt[pos] != '_')
+			break;
+	}
+
+	for (; pos < pattlen; pos++)
+	{
+		/* % and _ are wildcard characters in LIKE */
+		if (patt[pos] == '%')
+			sel *= FULL_WILDCARD_SEL;
+		else if (patt[pos] == '_')
+			sel *= ANY_CHAR_SEL;
+		else if (patt[pos] == '\\')
+		{
+			/* Backslash quotes the next character */
+			pos++;
+			if (pos >= pattlen)
+				break;
+			sel *= FIXED_CHAR_SEL;
+		}
+		else
+			sel *= FIXED_CHAR_SEL;
+	}
+	/* Could get sel > 1 if multiple wildcards */
+	if (sel > 1.0)
+		sel = 1.0;
+	return sel;
+}
+
+static Selectivity
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
+{
+	Selectivity sel = 1.0;
+	int			paren_depth = 0;
+	int			paren_pos = 0;	/* dummy init to keep compiler quiet */
+	int			pos;
+
+	for (pos = 0; pos < pattlen; pos++)
+	{
+		if (patt[pos] == '(')
+		{
+			if (paren_depth == 0)
+				paren_pos = pos;	/* remember start of parenthesized item */
+			paren_depth++;
+		}
+		else if (patt[pos] == ')' && paren_depth > 0)
+		{
+			paren_depth--;
+			if (paren_depth == 0)
+				sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+											 pos - (paren_pos + 1),
+											 case_insensitive);
+		}
+		else if (patt[pos] == '|' && paren_depth == 0)
+		{
+			/*
+			 * If unquoted | is present at paren level 0 in pattern, we have
+			 * multiple alternatives; sum their probabilities.
+			 */
+			sel += regex_selectivity_sub(patt + (pos + 1),
+										 pattlen - (pos + 1),
+										 case_insensitive);
+			break;				/* rest of pattern is now processed */
+		}
+		else if (patt[pos] == '[')
+		{
+			bool		negclass = false;
+
+			if (patt[++pos] == '^')
+			{
+				negclass = true;
+				pos++;
+			}
+			if (patt[pos] == ']')	/* ']' at start of class is not special */
+				pos++;
+			while (pos < pattlen && patt[pos] != ']')
+				pos++;
+			if (paren_depth == 0)
+				sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+		}
+		else if (patt[pos] == '.')
+		{
+			if (paren_depth == 0)
+				sel *= ANY_CHAR_SEL;
+		}
+		else if (patt[pos] == '*' ||
+				 patt[pos] == '?' ||
+				 patt[pos] == '+')
+		{
+			/* Ought to be smarter about quantifiers... */
+			if (paren_depth == 0)
+				sel *= PARTIAL_WILDCARD_SEL;
+		}
+		else if (patt[pos] == '{')
+		{
+			while (pos < pattlen && patt[pos] != '}')
+				pos++;
+			if (paren_depth == 0)
+				sel *= PARTIAL_WILDCARD_SEL;
+		}
+		else if (patt[pos] == '\\')
+		{
+			/* backslash quotes the next character */
+			pos++;
+			if (pos >= pattlen)
+				break;
+			if (paren_depth == 0)
+				sel *= FIXED_CHAR_SEL;
+		}
+		else
+		{
+			if (paren_depth == 0)
+				sel *= FIXED_CHAR_SEL;
+		}
+	}
+	/* Could get sel > 1 if multiple wildcards */
+	if (sel > 1.0)
+		sel = 1.0;
+	return sel;
+}
+
+static Selectivity
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+				  int fixed_prefix_len)
+{
+	Selectivity sel;
+
+	/* If patt doesn't end with $, consider it to have a trailing wildcard */
+	if (pattlen > 0 && patt[pattlen - 1] == '$' &&
+		(pattlen == 1 || patt[pattlen - 2] != '\\'))
+	{
+		/* has trailing $ */
+		sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
+	}
+	else
+	{
+		/* no trailing $ */
+		sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+		sel *= FULL_WILDCARD_SEL;
+	}
+
+	/*
+	 * If there's a fixed prefix, discount its selectivity.  We have to be
+	 * careful here since a very long prefix could result in pow's result
+	 * underflowing to zero (in which case "sel" probably has as well).
+	 */
+	if (fixed_prefix_len > 0)
+	{
+		double		prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+		if (prefixsel > 0.0)
+			sel /= prefixsel;
+	}
+
+	/* Make sure result stays in range */
+	CLAMP_PROBABILITY(sel);
+	return sel;
+}
+
+/*
+ * Check whether char is a letter (and, hence, subject to case-folding)
+ *
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does
+ * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
+ * Instead, just assume any non-ASCII char is potentially case-varying, and
+ * hard-wire knowledge of which ASCII chars are letters.
+ */
+static int
+pattern_char_isalpha(char c, bool is_multibyte,
+					 pg_locale_t locale, bool locale_is_c)
+{
+	if (locale_is_c)
+		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+	else if (is_multibyte && IS_HIGHBIT_SET(c))
+		return true;
+	else if (locale && locale->provider == COLLPROVIDER_ICU)
+		return IS_HIGHBIT_SET(c) ||
+			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+#ifdef HAVE_LOCALE_T
+	else if (locale && locale->provider == COLLPROVIDER_LIBC)
+		return isalpha_l((unsigned char) c, locale->info.lt);
+#endif
+	else
+		return isalpha((unsigned char) c);
+}
+
+
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
+{
+	if (*ptr >= 255)
+		return false;
+	(*ptr)++;
+	return true;
+}
+
+/*
+ * Try to generate a string greater than the given string or any
+ * string it is a prefix of.  If successful, return a palloc'd string
+ * in the form of a Const node; else return NULL.
+ *
+ * The caller must provide the appropriate "less than" comparison function
+ * for testing the strings, along with the collation to use.
+ *
+ * The key requirement here is that given a prefix string, say "foo",
+ * we must be able to generate another string "fop" that is greater than
+ * all strings "foobar" starting with "foo".  We can test that we have
+ * generated a string greater than the prefix string, but in non-C collations
+ * that is not a bulletproof guarantee that an extension of the string might
+ * not sort after it; an example is that "foo " is less than "foo!", but it
+ * is not clear that a "dictionary" sort ordering will consider "foo!" less
+ * than "foo bar".  CAUTION: Therefore, this function should be used only for
+ * estimation purposes when working in a non-C collation.
+ *
+ * To try to catch most cases where an extended string might otherwise sort
+ * before the result value, we determine which of the strings "Z", "z", "y",
+ * and "9" is seen as largest by the collation, and append that to the given
+ * prefix before trying to find a string that compares as larger.
+ *
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position.  A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
+ */
+static Const *
+make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
+{
+	Oid			datatype = str_const->consttype;
+	char	   *workstr;
+	int			len;
+	Datum		cmpstr;
+	char	   *cmptxt = NULL;
+	mbcharacter_incrementer charinc;
+
+	/*
+	 * Get a modifiable copy of the prefix string in C-string format, and set
+	 * up the string we will compare to as a Datum.  In C locale this can just
+	 * be the given prefix string, otherwise we need to add a suffix.  Type
+	 * BYTEA sorts bytewise so it never needs a suffix either.
+	 */
+	if (datatype == BYTEAOID)
+	{
+		bytea	   *bstr = DatumGetByteaPP(str_const->constvalue);
+
+		len = VARSIZE_ANY_EXHDR(bstr);
+		workstr = (char *) palloc(len);
+		memcpy(workstr, VARDATA_ANY(bstr), len);
+		Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
+		cmpstr = str_const->constvalue;
+	}
+	else
+	{
+		if (datatype == NAMEOID)
+			workstr = DatumGetCString(DirectFunctionCall1(nameout,
+														  str_const->constvalue));
+		else
+			workstr = TextDatumGetCString(str_const->constvalue);
+		len = strlen(workstr);
+		if (lc_collate_is_c(collation) || len == 0)
+			cmpstr = str_const->constvalue;
+		else
+		{
+			/* If first time through, determine the suffix to use */
+			static char suffixchar = 0;
+			static Oid	suffixcollation = 0;
+
+			if (!suffixchar || suffixcollation != collation)
+			{
+				char	   *best;
+
+				best = "Z";
+				if (varstr_cmp(best, 1, "z", 1, collation) < 0)
+					best = "z";
+				if (varstr_cmp(best, 1, "y", 1, collation) < 0)
+					best = "y";
+				if (varstr_cmp(best, 1, "9", 1, collation) < 0)
+					best = "9";
+				suffixchar = *best;
+				suffixcollation = collation;
+			}
+
+			/* And build the string to compare to */
+			if (datatype == NAMEOID)
+			{
+				cmptxt = palloc(len + 2);
+				memcpy(cmptxt, workstr, len);
+				cmptxt[len] = suffixchar;
+				cmptxt[len + 1] = '\0';
+				cmpstr = PointerGetDatum(cmptxt);
+			}
+			else
+			{
+				cmptxt = palloc(VARHDRSZ + len + 1);
+				SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
+				memcpy(VARDATA(cmptxt), workstr, len);
+				*(VARDATA(cmptxt) + len) = suffixchar;
+				cmpstr = PointerGetDatum(cmptxt);
+			}
+		}
+	}
+
+	/* Select appropriate character-incrementer function */
+	if (datatype == BYTEAOID)
+		charinc = byte_increment;
+	else
+		charinc = pg_database_encoding_character_incrementer();
+
+	/* And search ... */
+	while (len > 0)
+	{
+		int			charlen;
+		unsigned char *lastchar;
+
+		/* Identify the last character --- for bytea, just the last byte */
+		if (datatype == BYTEAOID)
+			charlen = 1;
+		else
+			charlen = len - pg_mbcliplen(workstr, len, len - 1);
+		lastchar = (unsigned char *) (workstr + len - charlen);
+
+		/*
+		 * Try to generate a larger string by incrementing the last character
+		 * (for BYTEA, we treat each byte as a character).
+		 *
+		 * Note: the incrementer function is expected to return true if it's
+		 * generated a valid-per-the-encoding new character, otherwise false.
+		 * The contents of the character on false return are unspecified.
+		 */
+		while (charinc(lastchar, charlen))
+		{
+			Const	   *workstr_const;
+
+			if (datatype == BYTEAOID)
+				workstr_const = string_to_bytea_const(workstr, len);
+			else
+				workstr_const = string_to_const(workstr, datatype);
+
+			if (DatumGetBool(FunctionCall2Coll(ltproc,
+											   collation,
+											   cmpstr,
+											   workstr_const->constvalue)))
+			{
+				/* Successfully made a string larger than cmpstr */
+				if (cmptxt)
+					pfree(cmptxt);
+				pfree(workstr);
+				return workstr_const;
+			}
+
+			/* No good, release unusable value and try again */
+			pfree(DatumGetPointer(workstr_const->constvalue));
+			pfree(workstr_const);
+		}
+
+		/*
+		 * No luck here, so truncate off the last character and try to
+		 * increment the next one.
+		 */
+		len -= charlen;
+		workstr[len] = '\0';
+	}
+
+	/* Failed... */
+	if (cmptxt)
+		pfree(cmptxt);
+	pfree(workstr);
+
+	return NULL;
+}
+
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+	Assert(str != NULL);
+
+	/*
+	 * We cheat a little by assuming that CStringGetTextDatum() will do for
+	 * bpchar and varchar constants too...
+	 */
+	if (datatype == NAMEOID)
+		return DirectFunctionCall1(namein, CStringGetDatum(str));
+	else if (datatype == BYTEAOID)
+		return DirectFunctionCall1(byteain, CStringGetDatum(str));
+	else
+		return CStringGetTextDatum(str);
+}
+
+/*
+ * Generate a Const node of the appropriate type from a C string.
+ */
+static Const *
+string_to_const(const char *str, Oid datatype)
+{
+	Datum		conval = string_to_datum(str, datatype);
+	Oid			collation;
+	int			constlen;
+
+	/*
+	 * We only need to support a few datatypes here, so hard-wire properties
+	 * instead of incurring the expense of catalog lookups.
+	 */
+	switch (datatype)
+	{
+		case TEXTOID:
+		case VARCHAROID:
+		case BPCHAROID:
+			collation = DEFAULT_COLLATION_OID;
+			constlen = -1;
+			break;
+
+		case NAMEOID:
+			collation = C_COLLATION_OID;
+			constlen = NAMEDATALEN;
+			break;
+
+		case BYTEAOID:
+			collation = InvalidOid;
+			constlen = -1;
+			break;
+
+		default:
+			elog(ERROR, "unexpected datatype in string_to_const: %u",
+				 datatype);
+			return NULL;
+	}
+
+	return makeConst(datatype, -1, collation, constlen,
+					 conval, false, false);
+}
+
+/*
+ * Generate a Const node of bytea type from a binary C string and a length.
+ */
+static Const *
+string_to_bytea_const(const char *str, size_t str_len)
+{
+	bytea	   *bstr = palloc(VARHDRSZ + str_len);
+	Datum		conval;
+
+	memcpy(VARDATA(bstr), str, str_len);
+	SET_VARSIZE(bstr, VARHDRSZ + str_len);
+	conval = PointerGetDatum(bstr);
+
+	return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
commit	46651ce6fe013220ed397add242004d764fc0153 (patch)
tree	6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/utils/adt/like_support.c
parent	Initial commit. (diff)
download	postgresql-14-upstream.tar.xz postgresql-14-upstream.zip