diff options
Diffstat (limited to 'src/backend/optimizer/geqo/geqo_eval.c')
-rw-r--r-- | src/backend/optimizer/geqo/geqo_eval.c | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c new file mode 100644 index 0000000..2ecba83 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -0,0 +1,338 @@ +/*------------------------------------------------------------------------ + * + * geqo_eval.c + * Routines to evaluate query trees + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_eval.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#include "postgres.h" + +#include <float.h> +#include <limits.h> +#include <math.h> + +#include "optimizer/geqo.h" +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "utils/memutils.h" + + +/* A "clump" of already-joined relations within gimme_tree */ +typedef struct +{ + RelOptInfo *joinrel; /* joinrel for the set of relations */ + int size; /* number of input relations in clump */ +} Clump; + +static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, + int num_gene, bool force); +static bool desirable_join(PlannerInfo *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel); + + +/* + * geqo_eval + * + * Returns cost of a query tree as an individual of the population. + * + * If no legal join order can be extracted from the proposed tour, + * returns DBL_MAX. + */ +Cost +geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) +{ + MemoryContext mycontext; + MemoryContext oldcxt; + RelOptInfo *joinrel; + Cost fitness; + int savelength; + struct HTAB *savehash; + + /* + * Create a private memory context that will hold all temp storage + * allocated inside gimme_tree(). + * + * Since geqo_eval() will be called many times, we can't afford to let all + * that memory go unreclaimed until end of statement. Note we make the + * temp context a child of the planner's normal context, so that it will + * be freed even if we abort via ereport(ERROR). + */ + mycontext = AllocSetContextCreate(CurrentMemoryContext, + "GEQO", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(mycontext); + + /* + * gimme_tree will add entries to root->join_rel_list, which may or may + * not already contain some entries. The newly added entries will be + * recycled by the MemoryContextDelete below, so we must ensure that the + * list is restored to its former state before exiting. We can do this by + * truncating the list to its original length. NOTE this assumes that any + * added entries are appended at the end! + * + * We also must take care not to mess up the outer join_rel_hash, if there + * is one. We can do this by just temporarily setting the link to NULL. + * (If we are dealing with enough join rels, which we very likely are, a + * new hash table will get built and used locally.) + * + * join_rel_level[] shouldn't be in use, so just Assert it isn't. + */ + savelength = list_length(root->join_rel_list); + savehash = root->join_rel_hash; + Assert(root->join_rel_level == NULL); + + root->join_rel_hash = NULL; + + /* construct the best path for the given combination of relations */ + joinrel = gimme_tree(root, tour, num_gene); + + /* + * compute fitness, if we found a valid join + * + * XXX geqo does not currently support optimization for partial result + * retrieval, nor do we take any cognizance of possible use of + * parameterized paths --- how to fix? + */ + if (joinrel) + { + Path *best_path = joinrel->cheapest_total_path; + + fitness = best_path->total_cost; + } + else + fitness = DBL_MAX; + + /* + * Restore join_rel_list to its former state, and put back original + * hashtable if any. + */ + root->join_rel_list = list_truncate(root->join_rel_list, + savelength); + root->join_rel_hash = savehash; + + /* release all the memory acquired within gimme_tree */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(mycontext); + + return fitness; +} + +/* + * gimme_tree + * Form planner estimates for a join tree constructed in the specified + * order. + * + * 'tour' is the proposed join order, of length 'num_gene' + * + * Returns a new join relation whose cheapest path is the best plan for + * this join order. NB: will return NULL if join order is invalid and + * we can't modify it into a valid order. + * + * The original implementation of this routine always joined in the specified + * order, and so could only build left-sided plans (and right-sided and + * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). + * It could never produce a "bushy" plan. This had a couple of big problems, + * of which the worst was that there are situations involving join order + * restrictions where the only valid plans are bushy. + * + * The present implementation takes the given tour as a guideline, but + * postpones joins that are illegal or seem unsuitable according to some + * heuristic rules. This allows correct bushy plans to be generated at need, + * and as a nice side-effect it seems to materially improve the quality of the + * generated plans. Note however that since it's just a heuristic, it can + * still fail in some cases. (In particular, we might clump together + * relations that actually mustn't be joined yet due to LATERAL restrictions; + * since there's no provision for un-clumping, this must lead to failure.) + */ +RelOptInfo * +gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) +{ + GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; + List *clumps; + int rel_count; + + /* + * Sometimes, a relation can't yet be joined to others due to heuristics + * or actual semantic restrictions. We maintain a list of "clumps" of + * successfully joined relations, with larger clumps at the front. Each + * new relation from the tour is added to the first clump it can be joined + * to; if there is none then it becomes a new clump of its own. When we + * enlarge an existing clump we check to see if it can now be merged with + * any other clumps. After the tour is all scanned, we forget about the + * heuristics and try to forcibly join any remaining clumps. If we are + * unable to merge all the clumps into one, fail. + */ + clumps = NIL; + + for (rel_count = 0; rel_count < num_gene; rel_count++) + { + int cur_rel_index; + RelOptInfo *cur_rel; + Clump *cur_clump; + + /* Get the next input relation */ + cur_rel_index = (int) tour[rel_count]; + cur_rel = (RelOptInfo *) list_nth(private->initial_rels, + cur_rel_index - 1); + + /* Make it into a single-rel clump */ + cur_clump = (Clump *) palloc(sizeof(Clump)); + cur_clump->joinrel = cur_rel; + cur_clump->size = 1; + + /* Merge it into the clumps list, using only desirable joins */ + clumps = merge_clump(root, clumps, cur_clump, num_gene, false); + } + + if (list_length(clumps) > 1) + { + /* Force-join the remaining clumps in some legal order */ + List *fclumps; + ListCell *lc; + + fclumps = NIL; + foreach(lc, clumps) + { + Clump *clump = (Clump *) lfirst(lc); + + fclumps = merge_clump(root, fclumps, clump, num_gene, true); + } + clumps = fclumps; + } + + /* Did we succeed in forming a single join relation? */ + if (list_length(clumps) != 1) + return NULL; + + return ((Clump *) linitial(clumps))->joinrel; +} + +/* + * Merge a "clump" into the list of existing clumps for gimme_tree. + * + * We try to merge the clump into some existing clump, and repeat if + * successful. When no more merging is possible, insert the clump + * into the list, preserving the list ordering rule (namely, that + * clumps of larger size appear earlier). + * + * If force is true, merge anywhere a join is legal, even if it causes + * a cartesian join to be performed. When force is false, do only + * "desirable" joins. + */ +static List * +merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, + bool force) +{ + ListCell *lc; + int pos; + + /* Look for a clump that new_clump can join to */ + foreach(lc, clumps) + { + Clump *old_clump = (Clump *) lfirst(lc); + + if (force || + desirable_join(root, old_clump->joinrel, new_clump->joinrel)) + { + RelOptInfo *joinrel; + + /* + * Construct a RelOptInfo representing the join of these two input + * relations. Note that we expect the joinrel not to exist in + * root->join_rel_list yet, and so the paths constructed for it + * will only include the ones we want. + */ + joinrel = make_join_rel(root, + old_clump->joinrel, + new_clump->joinrel); + + /* Keep searching if join order is not valid */ + if (joinrel) + { + /* Create paths for partitionwise joins. */ + generate_partitionwise_join_paths(root, joinrel); + + /* + * Except for the topmost scan/join rel, consider gathering + * partial paths. We'll do the same for the topmost scan/join + * rel once we know the final targetlist (see + * grouping_planner). + */ + if (old_clump->size + new_clump->size < num_gene) + generate_useful_gather_paths(root, joinrel, false); + + /* Find and save the cheapest paths for this joinrel */ + set_cheapest(joinrel); + + /* Absorb new clump into old */ + old_clump->joinrel = joinrel; + old_clump->size += new_clump->size; + pfree(new_clump); + + /* Remove old_clump from list */ + clumps = foreach_delete_current(clumps, lc); + + /* + * Recursively try to merge the enlarged old_clump with + * others. When no further merge is possible, we'll reinsert + * it into the list. + */ + return merge_clump(root, clumps, old_clump, num_gene, force); + } + } + } + + /* + * No merging is possible, so add new_clump as an independent clump, in + * proper order according to size. We can be fast for the common case + * where it has size 1 --- it should always go at the end. + */ + if (clumps == NIL || new_clump->size == 1) + return lappend(clumps, new_clump); + + /* Else search for the place to insert it */ + for (pos = 0; pos < list_length(clumps); pos++) + { + Clump *old_clump = (Clump *) list_nth(clumps, pos); + + if (new_clump->size > old_clump->size) + break; /* new_clump belongs before old_clump */ + } + clumps = list_insert_nth(clumps, pos, new_clump); + + return clumps; +} + +/* + * Heuristics for gimme_tree: do we want to join these two relations? + */ +static bool +desirable_join(PlannerInfo *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel) +{ + /* + * Join if there is an applicable join clause, or if there is a join order + * restriction forcing these rels to be joined. + */ + if (have_relevant_joinclause(root, outer_rel, inner_rel) || + have_join_order_restriction(root, outer_rel, inner_rel)) + return true; + + /* Otherwise postpone the join till later. */ + return false; +} |