summaryrefslogtreecommitdiffstats
path: root/src/backend/optimizer/geqo/geqo_eval.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/geqo/geqo_eval.c')
-rw-r--r--src/backend/optimizer/geqo/geqo_eval.c338
1 files changed, 338 insertions, 0 deletions
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
new file mode 100644
index 0000000..004481d
--- /dev/null
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -0,0 +1,338 @@
+/*------------------------------------------------------------------------
+ *
+ * geqo_eval.c
+ * Routines to evaluate query trees
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/optimizer/geqo/geqo_eval.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* contributed by:
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+ * Martin Utesch * Institute of Automatic Control *
+ = = University of Mining and Technology =
+ * utesch@aut.tu-freiberg.de * Freiberg, Germany *
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+ */
+
+#include "postgres.h"
+
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+
+#include "optimizer/geqo.h"
+#include "optimizer/joininfo.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "utils/memutils.h"
+
+
+/* A "clump" of already-joined relations within gimme_tree */
+typedef struct
+{
+ RelOptInfo *joinrel; /* joinrel for the set of relations */
+ int size; /* number of input relations in clump */
+} Clump;
+
+static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
+ int num_gene, bool force);
+static bool desirable_join(PlannerInfo *root,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel);
+
+
+/*
+ * geqo_eval
+ *
+ * Returns cost of a query tree as an individual of the population.
+ *
+ * If no legal join order can be extracted from the proposed tour,
+ * returns DBL_MAX.
+ */
+Cost
+geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
+{
+ MemoryContext mycontext;
+ MemoryContext oldcxt;
+ RelOptInfo *joinrel;
+ Cost fitness;
+ int savelength;
+ struct HTAB *savehash;
+
+ /*
+ * Create a private memory context that will hold all temp storage
+ * allocated inside gimme_tree().
+ *
+ * Since geqo_eval() will be called many times, we can't afford to let all
+ * that memory go unreclaimed until end of statement. Note we make the
+ * temp context a child of the planner's normal context, so that it will
+ * be freed even if we abort via ereport(ERROR).
+ */
+ mycontext = AllocSetContextCreate(CurrentMemoryContext,
+ "GEQO",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcxt = MemoryContextSwitchTo(mycontext);
+
+ /*
+ * gimme_tree will add entries to root->join_rel_list, which may or may
+ * not already contain some entries. The newly added entries will be
+ * recycled by the MemoryContextDelete below, so we must ensure that the
+ * list is restored to its former state before exiting. We can do this by
+ * truncating the list to its original length. NOTE this assumes that any
+ * added entries are appended at the end!
+ *
+ * We also must take care not to mess up the outer join_rel_hash, if there
+ * is one. We can do this by just temporarily setting the link to NULL.
+ * (If we are dealing with enough join rels, which we very likely are, a
+ * new hash table will get built and used locally.)
+ *
+ * join_rel_level[] shouldn't be in use, so just Assert it isn't.
+ */
+ savelength = list_length(root->join_rel_list);
+ savehash = root->join_rel_hash;
+ Assert(root->join_rel_level == NULL);
+
+ root->join_rel_hash = NULL;
+
+ /* construct the best path for the given combination of relations */
+ joinrel = gimme_tree(root, tour, num_gene);
+
+ /*
+ * compute fitness, if we found a valid join
+ *
+ * XXX geqo does not currently support optimization for partial result
+ * retrieval, nor do we take any cognizance of possible use of
+ * parameterized paths --- how to fix?
+ */
+ if (joinrel)
+ {
+ Path *best_path = joinrel->cheapest_total_path;
+
+ fitness = best_path->total_cost;
+ }
+ else
+ fitness = DBL_MAX;
+
+ /*
+ * Restore join_rel_list to its former state, and put back original
+ * hashtable if any.
+ */
+ root->join_rel_list = list_truncate(root->join_rel_list,
+ savelength);
+ root->join_rel_hash = savehash;
+
+ /* release all the memory acquired within gimme_tree */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(mycontext);
+
+ return fitness;
+}
+
+/*
+ * gimme_tree
+ * Form planner estimates for a join tree constructed in the specified
+ * order.
+ *
+ * 'tour' is the proposed join order, of length 'num_gene'
+ *
+ * Returns a new join relation whose cheapest path is the best plan for
+ * this join order. NB: will return NULL if join order is invalid and
+ * we can't modify it into a valid order.
+ *
+ * The original implementation of this routine always joined in the specified
+ * order, and so could only build left-sided plans (and right-sided and
+ * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
+ * It could never produce a "bushy" plan. This had a couple of big problems,
+ * of which the worst was that there are situations involving join order
+ * restrictions where the only valid plans are bushy.
+ *
+ * The present implementation takes the given tour as a guideline, but
+ * postpones joins that are illegal or seem unsuitable according to some
+ * heuristic rules. This allows correct bushy plans to be generated at need,
+ * and as a nice side-effect it seems to materially improve the quality of the
+ * generated plans. Note however that since it's just a heuristic, it can
+ * still fail in some cases. (In particular, we might clump together
+ * relations that actually mustn't be joined yet due to LATERAL restrictions;
+ * since there's no provision for un-clumping, this must lead to failure.)
+ */
+RelOptInfo *
+gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
+{
+ GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
+ List *clumps;
+ int rel_count;
+
+ /*
+ * Sometimes, a relation can't yet be joined to others due to heuristics
+ * or actual semantic restrictions. We maintain a list of "clumps" of
+ * successfully joined relations, with larger clumps at the front. Each
+ * new relation from the tour is added to the first clump it can be joined
+ * to; if there is none then it becomes a new clump of its own. When we
+ * enlarge an existing clump we check to see if it can now be merged with
+ * any other clumps. After the tour is all scanned, we forget about the
+ * heuristics and try to forcibly join any remaining clumps. If we are
+ * unable to merge all the clumps into one, fail.
+ */
+ clumps = NIL;
+
+ for (rel_count = 0; rel_count < num_gene; rel_count++)
+ {
+ int cur_rel_index;
+ RelOptInfo *cur_rel;
+ Clump *cur_clump;
+
+ /* Get the next input relation */
+ cur_rel_index = (int) tour[rel_count];
+ cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
+ cur_rel_index - 1);
+
+ /* Make it into a single-rel clump */
+ cur_clump = (Clump *) palloc(sizeof(Clump));
+ cur_clump->joinrel = cur_rel;
+ cur_clump->size = 1;
+
+ /* Merge it into the clumps list, using only desirable joins */
+ clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
+ }
+
+ if (list_length(clumps) > 1)
+ {
+ /* Force-join the remaining clumps in some legal order */
+ List *fclumps;
+ ListCell *lc;
+
+ fclumps = NIL;
+ foreach(lc, clumps)
+ {
+ Clump *clump = (Clump *) lfirst(lc);
+
+ fclumps = merge_clump(root, fclumps, clump, num_gene, true);
+ }
+ clumps = fclumps;
+ }
+
+ /* Did we succeed in forming a single join relation? */
+ if (list_length(clumps) != 1)
+ return NULL;
+
+ return ((Clump *) linitial(clumps))->joinrel;
+}
+
+/*
+ * Merge a "clump" into the list of existing clumps for gimme_tree.
+ *
+ * We try to merge the clump into some existing clump, and repeat if
+ * successful. When no more merging is possible, insert the clump
+ * into the list, preserving the list ordering rule (namely, that
+ * clumps of larger size appear earlier).
+ *
+ * If force is true, merge anywhere a join is legal, even if it causes
+ * a cartesian join to be performed. When force is false, do only
+ * "desirable" joins.
+ */
+static List *
+merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
+ bool force)
+{
+ ListCell *lc;
+ int pos;
+
+ /* Look for a clump that new_clump can join to */
+ foreach(lc, clumps)
+ {
+ Clump *old_clump = (Clump *) lfirst(lc);
+
+ if (force ||
+ desirable_join(root, old_clump->joinrel, new_clump->joinrel))
+ {
+ RelOptInfo *joinrel;
+
+ /*
+ * Construct a RelOptInfo representing the join of these two input
+ * relations. Note that we expect the joinrel not to exist in
+ * root->join_rel_list yet, and so the paths constructed for it
+ * will only include the ones we want.
+ */
+ joinrel = make_join_rel(root,
+ old_clump->joinrel,
+ new_clump->joinrel);
+
+ /* Keep searching if join order is not valid */
+ if (joinrel)
+ {
+ /* Create paths for partitionwise joins. */
+ generate_partitionwise_join_paths(root, joinrel);
+
+ /*
+ * Except for the topmost scan/join rel, consider gathering
+ * partial paths. We'll do the same for the topmost scan/join
+ * rel once we know the final targetlist (see
+ * grouping_planner).
+ */
+ if (!bms_equal(joinrel->relids, root->all_baserels))
+ generate_useful_gather_paths(root, joinrel, false);
+
+ /* Find and save the cheapest paths for this joinrel */
+ set_cheapest(joinrel);
+
+ /* Absorb new clump into old */
+ old_clump->joinrel = joinrel;
+ old_clump->size += new_clump->size;
+ pfree(new_clump);
+
+ /* Remove old_clump from list */
+ clumps = foreach_delete_current(clumps, lc);
+
+ /*
+ * Recursively try to merge the enlarged old_clump with
+ * others. When no further merge is possible, we'll reinsert
+ * it into the list.
+ */
+ return merge_clump(root, clumps, old_clump, num_gene, force);
+ }
+ }
+ }
+
+ /*
+ * No merging is possible, so add new_clump as an independent clump, in
+ * proper order according to size. We can be fast for the common case
+ * where it has size 1 --- it should always go at the end.
+ */
+ if (clumps == NIL || new_clump->size == 1)
+ return lappend(clumps, new_clump);
+
+ /* Else search for the place to insert it */
+ for (pos = 0; pos < list_length(clumps); pos++)
+ {
+ Clump *old_clump = (Clump *) list_nth(clumps, pos);
+
+ if (new_clump->size > old_clump->size)
+ break; /* new_clump belongs before old_clump */
+ }
+ clumps = list_insert_nth(clumps, pos, new_clump);
+
+ return clumps;
+}
+
+/*
+ * Heuristics for gimme_tree: do we want to join these two relations?
+ */
+static bool
+desirable_join(PlannerInfo *root,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel)
+{
+ /*
+ * Join if there is an applicable join clause, or if there is a join order
+ * restriction forcing these rels to be joined.
+ */
+ if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
+ have_join_order_restriction(root, outer_rel, inner_rel))
+ return true;
+
+ /* Otherwise postpone the join till later. */
+ return false;
+}