diff options
Diffstat (limited to 'src/backend/optimizer/geqo')
-rw-r--r-- | src/backend/optimizer/geqo/Makefile | 33 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_copy.c | 54 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_cx.c | 124 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_erx.c | 471 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_eval.c | 338 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_main.c | 353 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_misc.c | 132 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_mutation.c | 66 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_ox1.c | 95 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_ox2.c | 112 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_pmx.c | 224 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_pool.c | 265 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_px.c | 110 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_random.c | 40 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_recombination.c | 92 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_selection.c | 115 |
16 files changed, 2624 insertions, 0 deletions
diff --git a/src/backend/optimizer/geqo/Makefile b/src/backend/optimizer/geqo/Makefile new file mode 100644 index 0000000..1978c14 --- /dev/null +++ b/src/backend/optimizer/geqo/Makefile @@ -0,0 +1,33 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for the genetic query optimizer module +# +# Copyright (c) 1994, Regents of the University of California +# +# src/backend/optimizer/geqo/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/optimizer/geqo +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + geqo_copy.o \ + geqo_cx.o \ + geqo_erx.o \ + geqo_eval.o \ + geqo_main.o \ + geqo_misc.o \ + geqo_mutation.o \ + geqo_ox1.o \ + geqo_ox2.o \ + geqo_pmx.o \ + geqo_pool.o \ + geqo_px.o \ + geqo_random.o \ + geqo_recombination.o \ + geqo_selection.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/geqo/geqo_copy.c b/src/backend/optimizer/geqo/geqo_copy.c new file mode 100644 index 0000000..4f6226b --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_copy.c @@ -0,0 +1,54 @@ +/*------------------------------------------------------------------------ + * + * geqo_copy.c + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_copy.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from D. Whitley's Genitor algorithm */ + +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_copy.h" + +/* geqo_copy + * + * copies one gene to another + * + */ +void +geqo_copy(PlannerInfo *root, Chromosome *chromo1, Chromosome *chromo2, + int string_length) +{ + int i; + + for (i = 0; i < string_length; i++) + chromo1->string[i] = chromo2->string[i]; + + chromo1->worth = chromo2->worth; +} diff --git a/src/backend/optimizer/geqo/geqo_cx.c b/src/backend/optimizer/geqo/geqo_cx.c new file mode 100644 index 0000000..3b8d2fe --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_cx.c @@ -0,0 +1,124 @@ +/*------------------------------------------------------------------------ +* +* geqo_cx.c +* +* cycle crossover [CX] routines; +* CX operator according to Oliver et al +* (Proc 2nd Int'l Conf on GA's) +* +* src/backend/optimizer/geqo/geqo_cx.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the cx algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(CX) + +/* cx + * + * cycle crossover + */ +int +cx(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, + int num_gene, City * city_table) +{ + int i, + start_pos, + curr_pos; + int count = 0; + int num_diffs = 0; + + /* initialize city table */ + for (i = 1; i <= num_gene; i++) + { + city_table[i].used = 0; + city_table[tour2[i - 1]].tour2_position = i - 1; + city_table[tour1[i - 1]].tour1_position = i - 1; + } + + /* choose random cycle starting position */ + start_pos = geqo_randint(root, num_gene - 1, 0); + + /* child inherits first city */ + offspring[start_pos] = tour1[start_pos]; + + /* begin cycle with tour1 */ + curr_pos = start_pos; + city_table[(int) tour1[start_pos]].used = 1; + + count++; + + /* cx main part */ + + +/* STEP 1 */ + + while (tour2[curr_pos] != tour1[start_pos]) + { + city_table[(int) tour2[curr_pos]].used = 1; + curr_pos = city_table[(int) tour2[curr_pos]].tour1_position; + offspring[curr_pos] = tour1[curr_pos]; + count++; + } + + +/* STEP 2 */ + + /* failed to create a complete tour */ + if (count < num_gene) + { + for (i = 1; i <= num_gene; i++) + { + if (!city_table[i].used) + { + offspring[city_table[i].tour2_position] = + tour2[(int) city_table[i].tour2_position]; + count++; + } + } + } + + +/* STEP 3 */ + + /* still failed to create a complete tour */ + if (count < num_gene) + { + + /* count the number of differences between mom and offspring */ + for (i = 0; i < num_gene; i++) + if (tour1[i] != offspring[i]) + num_diffs++; + + } + + return num_diffs; +} + +#endif /* defined(CX) */ diff --git a/src/backend/optimizer/geqo/geqo_erx.c b/src/backend/optimizer/geqo/geqo_erx.c new file mode 100644 index 0000000..3b92f42 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_erx.c @@ -0,0 +1,471 @@ +/*------------------------------------------------------------------------ +* +* geqo_erx.c +* edge recombination crossover [ER] +* +* src/backend/optimizer/geqo/geqo_erx.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the edge recombination algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(ERX) + +static int gimme_edge(PlannerInfo *root, Gene gene1, Gene gene2, Edge *edge_table); +static void remove_gene(PlannerInfo *root, Gene gene, Edge edge, Edge *edge_table); +static Gene gimme_gene(PlannerInfo *root, Edge edge, Edge *edge_table); + +static Gene edge_failure(PlannerInfo *root, Gene *gene, int index, Edge *edge_table, int num_gene); + + +/* alloc_edge_table + * + * allocate memory for edge table + * + */ + +Edge * +alloc_edge_table(PlannerInfo *root, int num_gene) +{ + Edge *edge_table; + + /* + * palloc one extra location so that nodes numbered 1..n can be indexed + * directly; 0 will not be used + */ + + edge_table = (Edge *) palloc((num_gene + 1) * sizeof(Edge)); + + return edge_table; +} + +/* free_edge_table + * + * deallocate memory of edge table + * + */ +void +free_edge_table(PlannerInfo *root, Edge *edge_table) +{ + pfree(edge_table); +} + +/* gimme_edge_table + * + * fills a data structure which represents the set of explicit + * edges between points in the (2) input genes + * + * assumes circular tours and bidirectional edges + * + * gimme_edge() will set "shared" edges to negative values + * + * returns average number edges/city in range 2.0 - 4.0 + * where 2.0=homogeneous; 4.0=diverse + * + */ +float +gimme_edge_table(PlannerInfo *root, Gene *tour1, Gene *tour2, + int num_gene, Edge *edge_table) +{ + int i, + index1, + index2; + int edge_total; /* total number of unique edges in two genes */ + + /* at first clear the edge table's old data */ + for (i = 1; i <= num_gene; i++) + { + edge_table[i].total_edges = 0; + edge_table[i].unused_edges = 0; + } + + /* fill edge table with new data */ + + edge_total = 0; + + for (index1 = 0; index1 < num_gene; index1++) + { + /* + * presume the tour is circular, i.e. 1->2, 2->3, 3->1 this operation + * maps n back to 1 + */ + + index2 = (index1 + 1) % num_gene; + + /* + * edges are bidirectional, i.e. 1->2 is same as 2->1 call gimme_edge + * twice per edge + */ + + edge_total += gimme_edge(root, tour1[index1], tour1[index2], edge_table); + gimme_edge(root, tour1[index2], tour1[index1], edge_table); + + edge_total += gimme_edge(root, tour2[index1], tour2[index2], edge_table); + gimme_edge(root, tour2[index2], tour2[index1], edge_table); + } + + /* return average number of edges per index */ + return ((float) (edge_total * 2) / (float) num_gene); +} + +/* gimme_edge + * + * registers edge from city1 to city2 in input edge table + * + * no assumptions about directionality are made; + * therefore it is up to the calling routine to + * call gimme_edge twice to make a bi-directional edge + * between city1 and city2; + * uni-directional edges are possible as well (just call gimme_edge + * once with the direction from city1 to city2) + * + * returns 1 if edge was not already registered and was just added; + * 0 if edge was already registered and edge_table is unchanged + */ +static int +gimme_edge(PlannerInfo *root, Gene gene1, Gene gene2, Edge *edge_table) +{ + int i; + int edges; + int city1 = (int) gene1; + int city2 = (int) gene2; + + + /* check whether edge city1->city2 already exists */ + edges = edge_table[city1].total_edges; + + for (i = 0; i < edges; i++) + { + if ((Gene) Abs(edge_table[city1].edge_list[i]) == city2) + { + + /* mark shared edges as negative */ + edge_table[city1].edge_list[i] = 0 - city2; + + return 0; + } + } + + /* add city1->city2; */ + edge_table[city1].edge_list[edges] = city2; + + /* increment the number of edges from city1 */ + edge_table[city1].total_edges++; + edge_table[city1].unused_edges++; + + return 1; +} + +/* gimme_tour + * + * creates a new tour using edges from the edge table. + * priority is given to "shared" edges (i.e. edges which + * all parent genes possess and are marked as negative + * in the edge table.) + * + */ +int +gimme_tour(PlannerInfo *root, Edge *edge_table, Gene *new_gene, int num_gene) +{ + int i; + int edge_failures = 0; + + /* choose int between 1 and num_gene */ + new_gene[0] = (Gene) geqo_randint(root, num_gene, 1); + + for (i = 1; i < num_gene; i++) + { + /* + * as each point is entered into the tour, remove it from the edge + * table + */ + + remove_gene(root, new_gene[i - 1], edge_table[(int) new_gene[i - 1]], edge_table); + + /* find destination for the newly entered point */ + + if (edge_table[new_gene[i - 1]].unused_edges > 0) + new_gene[i] = gimme_gene(root, edge_table[(int) new_gene[i - 1]], edge_table); + + else + { /* cope with fault */ + edge_failures++; + + new_gene[i] = edge_failure(root, new_gene, i - 1, edge_table, num_gene); + } + + /* mark this node as incorporated */ + edge_table[(int) new_gene[i - 1]].unused_edges = -1; + + } /* for (i=1; i<num_gene; i++) */ + + return edge_failures; + +} + +/* remove_gene + * + * removes input gene from edge_table. + * input edge is used + * to identify deletion locations within edge table. + * + */ +static void +remove_gene(PlannerInfo *root, Gene gene, Edge edge, Edge *edge_table) +{ + int i, + j; + int possess_edge; + int genes_remaining; + + /* + * do for every gene known to have an edge to input gene (i.e. in + * edge_list for input edge) + */ + + for (i = 0; i < edge.unused_edges; i++) + { + possess_edge = (int) Abs(edge.edge_list[i]); + genes_remaining = edge_table[possess_edge].unused_edges; + + /* find the input gene in all edge_lists and delete it */ + for (j = 0; j < genes_remaining; j++) + { + + if ((Gene) Abs(edge_table[possess_edge].edge_list[j]) == gene) + { + + edge_table[possess_edge].unused_edges--; + + edge_table[possess_edge].edge_list[j] = + edge_table[possess_edge].edge_list[genes_remaining - 1]; + + break; + } + } + } +} + +/* gimme_gene + * + * priority is given to "shared" edges + * (i.e. edges which both genes possess) + * + */ +static Gene +gimme_gene(PlannerInfo *root, Edge edge, Edge *edge_table) +{ + int i; + Gene friend; + int minimum_edges; + int minimum_count = -1; + int rand_decision; + + /* + * no point has edges to more than 4 other points thus, this contrived + * minimum will be replaced + */ + + minimum_edges = 5; + + /* consider candidate destination points in edge list */ + + for (i = 0; i < edge.unused_edges; i++) + { + friend = (Gene) edge.edge_list[i]; + + /* + * give priority to shared edges that are negative; so return 'em + */ + + /* + * negative values are caught here so we need not worry about + * converting to absolute values + */ + if (friend < 0) + return (Gene) Abs(friend); + + + /* + * give priority to candidates with fewest remaining unused edges; + * find out what the minimum number of unused edges is + * (minimum_edges); if there is more than one candidate with the + * minimum number of unused edges keep count of this number + * (minimum_count); + */ + + /* + * The test for minimum_count can probably be removed at some point + * but comments should probably indicate exactly why it is guaranteed + * that the test will always succeed the first time around. If it can + * fail then the code is in error + */ + + + if (edge_table[(int) friend].unused_edges < minimum_edges) + { + minimum_edges = edge_table[(int) friend].unused_edges; + minimum_count = 1; + } + else if (minimum_count == -1) + elog(ERROR, "minimum_count not set"); + else if (edge_table[(int) friend].unused_edges == minimum_edges) + minimum_count++; + + } /* for (i=0; i<edge.unused_edges; i++) */ + + + /* random decision of the possible candidates to use */ + rand_decision = geqo_randint(root, minimum_count - 1, 0); + + + for (i = 0; i < edge.unused_edges; i++) + { + friend = (Gene) edge.edge_list[i]; + + /* return the chosen candidate point */ + if (edge_table[(int) friend].unused_edges == minimum_edges) + { + minimum_count--; + + if (minimum_count == rand_decision) + return friend; + } + } + + /* ... should never be reached */ + elog(ERROR, "neither shared nor minimum number nor random edge found"); + return 0; /* to keep the compiler quiet */ +} + +/* edge_failure + * + * routine for handling edge failure + * + */ +static Gene +edge_failure(PlannerInfo *root, Gene *gene, int index, Edge *edge_table, int num_gene) +{ + int i; + Gene fail_gene = gene[index]; + int remaining_edges = 0; + int four_count = 0; + int rand_decision; + + + /* + * how many edges remain? how many gene with four total (initial) edges + * remain? + */ + + for (i = 1; i <= num_gene; i++) + { + if ((edge_table[i].unused_edges != -1) && (i != (int) fail_gene)) + { + remaining_edges++; + + if (edge_table[i].total_edges == 4) + four_count++; + } + } + + /* + * random decision of the gene with remaining edges and whose total_edges + * == 4 + */ + + if (four_count != 0) + { + + rand_decision = geqo_randint(root, four_count - 1, 0); + + for (i = 1; i <= num_gene; i++) + { + + if ((Gene) i != fail_gene && + edge_table[i].unused_edges != -1 && + edge_table[i].total_edges == 4) + { + + four_count--; + + if (rand_decision == four_count) + return (Gene) i; + } + } + + elog(LOG, "no edge found via random decision and total_edges == 4"); + } + else if (remaining_edges != 0) + { + /* random decision of the gene with remaining edges */ + rand_decision = geqo_randint(root, remaining_edges - 1, 0); + + for (i = 1; i <= num_gene; i++) + { + + if ((Gene) i != fail_gene && + edge_table[i].unused_edges != -1) + { + + remaining_edges--; + + if (rand_decision == remaining_edges) + return i; + } + } + + elog(LOG, "no edge found via random decision with remaining edges"); + } + + /* + * edge table seems to be empty; this happens sometimes on the last point + * due to the fact that the first point is removed from the table even + * though only one of its edges has been determined + */ + + else + { /* occurs only at the last point in the tour; + * simply look for the point which is not yet + * used */ + + for (i = 1; i <= num_gene; i++) + if (edge_table[i].unused_edges >= 0) + return (Gene) i; + + elog(LOG, "no edge found via looking for the last unused point"); + } + + + /* ... should never be reached */ + elog(ERROR, "no edge found"); + return 0; /* to keep the compiler quiet */ +} + +#endif /* defined(ERX) */ diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c new file mode 100644 index 0000000..2ecba83 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -0,0 +1,338 @@ +/*------------------------------------------------------------------------ + * + * geqo_eval.c + * Routines to evaluate query trees + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_eval.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#include "postgres.h" + +#include <float.h> +#include <limits.h> +#include <math.h> + +#include "optimizer/geqo.h" +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "utils/memutils.h" + + +/* A "clump" of already-joined relations within gimme_tree */ +typedef struct +{ + RelOptInfo *joinrel; /* joinrel for the set of relations */ + int size; /* number of input relations in clump */ +} Clump; + +static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, + int num_gene, bool force); +static bool desirable_join(PlannerInfo *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel); + + +/* + * geqo_eval + * + * Returns cost of a query tree as an individual of the population. + * + * If no legal join order can be extracted from the proposed tour, + * returns DBL_MAX. + */ +Cost +geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) +{ + MemoryContext mycontext; + MemoryContext oldcxt; + RelOptInfo *joinrel; + Cost fitness; + int savelength; + struct HTAB *savehash; + + /* + * Create a private memory context that will hold all temp storage + * allocated inside gimme_tree(). + * + * Since geqo_eval() will be called many times, we can't afford to let all + * that memory go unreclaimed until end of statement. Note we make the + * temp context a child of the planner's normal context, so that it will + * be freed even if we abort via ereport(ERROR). + */ + mycontext = AllocSetContextCreate(CurrentMemoryContext, + "GEQO", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(mycontext); + + /* + * gimme_tree will add entries to root->join_rel_list, which may or may + * not already contain some entries. The newly added entries will be + * recycled by the MemoryContextDelete below, so we must ensure that the + * list is restored to its former state before exiting. We can do this by + * truncating the list to its original length. NOTE this assumes that any + * added entries are appended at the end! + * + * We also must take care not to mess up the outer join_rel_hash, if there + * is one. We can do this by just temporarily setting the link to NULL. + * (If we are dealing with enough join rels, which we very likely are, a + * new hash table will get built and used locally.) + * + * join_rel_level[] shouldn't be in use, so just Assert it isn't. + */ + savelength = list_length(root->join_rel_list); + savehash = root->join_rel_hash; + Assert(root->join_rel_level == NULL); + + root->join_rel_hash = NULL; + + /* construct the best path for the given combination of relations */ + joinrel = gimme_tree(root, tour, num_gene); + + /* + * compute fitness, if we found a valid join + * + * XXX geqo does not currently support optimization for partial result + * retrieval, nor do we take any cognizance of possible use of + * parameterized paths --- how to fix? + */ + if (joinrel) + { + Path *best_path = joinrel->cheapest_total_path; + + fitness = best_path->total_cost; + } + else + fitness = DBL_MAX; + + /* + * Restore join_rel_list to its former state, and put back original + * hashtable if any. + */ + root->join_rel_list = list_truncate(root->join_rel_list, + savelength); + root->join_rel_hash = savehash; + + /* release all the memory acquired within gimme_tree */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(mycontext); + + return fitness; +} + +/* + * gimme_tree + * Form planner estimates for a join tree constructed in the specified + * order. + * + * 'tour' is the proposed join order, of length 'num_gene' + * + * Returns a new join relation whose cheapest path is the best plan for + * this join order. NB: will return NULL if join order is invalid and + * we can't modify it into a valid order. + * + * The original implementation of this routine always joined in the specified + * order, and so could only build left-sided plans (and right-sided and + * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). + * It could never produce a "bushy" plan. This had a couple of big problems, + * of which the worst was that there are situations involving join order + * restrictions where the only valid plans are bushy. + * + * The present implementation takes the given tour as a guideline, but + * postpones joins that are illegal or seem unsuitable according to some + * heuristic rules. This allows correct bushy plans to be generated at need, + * and as a nice side-effect it seems to materially improve the quality of the + * generated plans. Note however that since it's just a heuristic, it can + * still fail in some cases. (In particular, we might clump together + * relations that actually mustn't be joined yet due to LATERAL restrictions; + * since there's no provision for un-clumping, this must lead to failure.) + */ +RelOptInfo * +gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) +{ + GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; + List *clumps; + int rel_count; + + /* + * Sometimes, a relation can't yet be joined to others due to heuristics + * or actual semantic restrictions. We maintain a list of "clumps" of + * successfully joined relations, with larger clumps at the front. Each + * new relation from the tour is added to the first clump it can be joined + * to; if there is none then it becomes a new clump of its own. When we + * enlarge an existing clump we check to see if it can now be merged with + * any other clumps. After the tour is all scanned, we forget about the + * heuristics and try to forcibly join any remaining clumps. If we are + * unable to merge all the clumps into one, fail. + */ + clumps = NIL; + + for (rel_count = 0; rel_count < num_gene; rel_count++) + { + int cur_rel_index; + RelOptInfo *cur_rel; + Clump *cur_clump; + + /* Get the next input relation */ + cur_rel_index = (int) tour[rel_count]; + cur_rel = (RelOptInfo *) list_nth(private->initial_rels, + cur_rel_index - 1); + + /* Make it into a single-rel clump */ + cur_clump = (Clump *) palloc(sizeof(Clump)); + cur_clump->joinrel = cur_rel; + cur_clump->size = 1; + + /* Merge it into the clumps list, using only desirable joins */ + clumps = merge_clump(root, clumps, cur_clump, num_gene, false); + } + + if (list_length(clumps) > 1) + { + /* Force-join the remaining clumps in some legal order */ + List *fclumps; + ListCell *lc; + + fclumps = NIL; + foreach(lc, clumps) + { + Clump *clump = (Clump *) lfirst(lc); + + fclumps = merge_clump(root, fclumps, clump, num_gene, true); + } + clumps = fclumps; + } + + /* Did we succeed in forming a single join relation? */ + if (list_length(clumps) != 1) + return NULL; + + return ((Clump *) linitial(clumps))->joinrel; +} + +/* + * Merge a "clump" into the list of existing clumps for gimme_tree. + * + * We try to merge the clump into some existing clump, and repeat if + * successful. When no more merging is possible, insert the clump + * into the list, preserving the list ordering rule (namely, that + * clumps of larger size appear earlier). + * + * If force is true, merge anywhere a join is legal, even if it causes + * a cartesian join to be performed. When force is false, do only + * "desirable" joins. + */ +static List * +merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, + bool force) +{ + ListCell *lc; + int pos; + + /* Look for a clump that new_clump can join to */ + foreach(lc, clumps) + { + Clump *old_clump = (Clump *) lfirst(lc); + + if (force || + desirable_join(root, old_clump->joinrel, new_clump->joinrel)) + { + RelOptInfo *joinrel; + + /* + * Construct a RelOptInfo representing the join of these two input + * relations. Note that we expect the joinrel not to exist in + * root->join_rel_list yet, and so the paths constructed for it + * will only include the ones we want. + */ + joinrel = make_join_rel(root, + old_clump->joinrel, + new_clump->joinrel); + + /* Keep searching if join order is not valid */ + if (joinrel) + { + /* Create paths for partitionwise joins. */ + generate_partitionwise_join_paths(root, joinrel); + + /* + * Except for the topmost scan/join rel, consider gathering + * partial paths. We'll do the same for the topmost scan/join + * rel once we know the final targetlist (see + * grouping_planner). + */ + if (old_clump->size + new_clump->size < num_gene) + generate_useful_gather_paths(root, joinrel, false); + + /* Find and save the cheapest paths for this joinrel */ + set_cheapest(joinrel); + + /* Absorb new clump into old */ + old_clump->joinrel = joinrel; + old_clump->size += new_clump->size; + pfree(new_clump); + + /* Remove old_clump from list */ + clumps = foreach_delete_current(clumps, lc); + + /* + * Recursively try to merge the enlarged old_clump with + * others. When no further merge is possible, we'll reinsert + * it into the list. + */ + return merge_clump(root, clumps, old_clump, num_gene, force); + } + } + } + + /* + * No merging is possible, so add new_clump as an independent clump, in + * proper order according to size. We can be fast for the common case + * where it has size 1 --- it should always go at the end. + */ + if (clumps == NIL || new_clump->size == 1) + return lappend(clumps, new_clump); + + /* Else search for the place to insert it */ + for (pos = 0; pos < list_length(clumps); pos++) + { + Clump *old_clump = (Clump *) list_nth(clumps, pos); + + if (new_clump->size > old_clump->size) + break; /* new_clump belongs before old_clump */ + } + clumps = list_insert_nth(clumps, pos, new_clump); + + return clumps; +} + +/* + * Heuristics for gimme_tree: do we want to join these two relations? + */ +static bool +desirable_join(PlannerInfo *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel) +{ + /* + * Join if there is an applicable join clause, or if there is a join order + * restriction forcing these rels to be joined. + */ + if (have_relevant_joinclause(root, outer_rel, inner_rel) || + have_join_order_restriction(root, outer_rel, inner_rel)) + return true; + + /* Otherwise postpone the join till later. */ + return false; +} diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c new file mode 100644 index 0000000..36df346 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -0,0 +1,353 @@ +/*------------------------------------------------------------------------ + * + * geqo_main.c + * solution to the query optimization problem + * by means of a Genetic Algorithm (GA) + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_main.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include <math.h> + +#include "optimizer/geqo_misc.h" +#include "optimizer/geqo_mutation.h" +#include "optimizer/geqo_pool.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_selection.h" + + +/* + * Configuration options + */ +int Geqo_effort; +int Geqo_pool_size; +int Geqo_generations; +double Geqo_selection_bias; +double Geqo_seed; + + +static int gimme_pool_size(int nr_rel); +static int gimme_number_generations(int pool_size); + +/* complain if no recombination mechanism is #define'd */ +#if !defined(ERX) && \ + !defined(PMX) && \ + !defined(CX) && \ + !defined(PX) && \ + !defined(OX1) && \ + !defined(OX2) +#error "must choose one GEQO recombination mechanism in geqo.h" +#endif + + +/* + * geqo + * solution of the query optimization problem + * similar to a constrained Traveling Salesman Problem (TSP) + */ + +RelOptInfo * +geqo(PlannerInfo *root, int number_of_rels, List *initial_rels) +{ + GeqoPrivateData private; + int generation; + Chromosome *momma; + Chromosome *daddy; + Chromosome *kid; + Pool *pool; + int pool_size, + number_generations; + +#ifdef GEQO_DEBUG + int status_interval; +#endif + Gene *best_tour; + RelOptInfo *best_rel; + +#if defined(ERX) + Edge *edge_table; /* list of edges */ + int edge_failures = 0; +#endif +#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2) + City *city_table; /* list of cities */ +#endif +#if defined(CX) + int cycle_diffs = 0; + int mutations = 0; +#endif + +/* set up private information */ + root->join_search_private = (void *) &private; + private.initial_rels = initial_rels; + +/* initialize private number generator */ + geqo_set_seed(root, Geqo_seed); + +/* set GA parameters */ + pool_size = gimme_pool_size(number_of_rels); + number_generations = gimme_number_generations(pool_size); +#ifdef GEQO_DEBUG + status_interval = 10; +#endif + +/* allocate genetic pool memory */ + pool = alloc_pool(root, pool_size, number_of_rels); + +/* random initialization of the pool */ + random_init_pool(root, pool); + +/* sort the pool according to cheapest path as fitness */ + sort_pool(root, pool); /* we have to do it only one time, since all + * kids replace the worst individuals in + * future (-> geqo_pool.c:spread_chromo ) */ + +#ifdef GEQO_DEBUG + elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f", + pool_size, + pool->data[0].worth, + pool->data[pool_size - 1].worth); +#endif + +/* allocate chromosome momma and daddy memory */ + momma = alloc_chromo(root, pool->string_length); + daddy = alloc_chromo(root, pool->string_length); + +#if defined (ERX) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using edge recombination crossover [ERX]"); +#endif +/* allocate edge table memory */ + edge_table = alloc_edge_table(root, pool->string_length); +#elif defined(PMX) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using partially matched crossover [PMX]"); +#endif +/* allocate chromosome kid memory */ + kid = alloc_chromo(root, pool->string_length); +#elif defined(CX) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using cycle crossover [CX]"); +#endif +/* allocate city table memory */ + kid = alloc_chromo(root, pool->string_length); + city_table = alloc_city_table(root, pool->string_length); +#elif defined(PX) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using position crossover [PX]"); +#endif +/* allocate city table memory */ + kid = alloc_chromo(root, pool->string_length); + city_table = alloc_city_table(root, pool->string_length); +#elif defined(OX1) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using order crossover [OX1]"); +#endif +/* allocate city table memory */ + kid = alloc_chromo(root, pool->string_length); + city_table = alloc_city_table(root, pool->string_length); +#elif defined(OX2) +#ifdef GEQO_DEBUG + elog(DEBUG2, "using order crossover [OX2]"); +#endif +/* allocate city table memory */ + kid = alloc_chromo(root, pool->string_length); + city_table = alloc_city_table(root, pool->string_length); +#endif + + +/* my pain main part: */ +/* iterative optimization */ + + for (generation = 0; generation < number_generations; generation++) + { + /* SELECTION: using linear bias function */ + geqo_selection(root, momma, daddy, pool, Geqo_selection_bias); + +#if defined (ERX) + /* EDGE RECOMBINATION CROSSOVER */ + gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table); + + kid = momma; + + /* are there any edge failures ? */ + edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length); +#elif defined(PMX) + /* PARTIALLY MATCHED CROSSOVER */ + pmx(root, momma->string, daddy->string, kid->string, pool->string_length); +#elif defined(CX) + /* CYCLE CROSSOVER */ + cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); + /* mutate the child */ + if (cycle_diffs == 0) + { + mutations++; + geqo_mutation(root, kid->string, pool->string_length); + } +#elif defined(PX) + /* POSITION CROSSOVER */ + px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); +#elif defined(OX1) + /* ORDER CROSSOVER */ + ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); +#elif defined(OX2) + /* ORDER CROSSOVER */ + ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); +#endif + + + /* EVALUATE FITNESS */ + kid->worth = geqo_eval(root, kid->string, pool->string_length); + + /* push the kid into the wilderness of life according to its worth */ + spread_chromo(root, kid, pool); + + +#ifdef GEQO_DEBUG + if (status_interval && !(generation % status_interval)) + print_gen(stdout, pool, generation); +#endif + + } + + +#if defined(ERX) +#if defined(GEQO_DEBUG) + if (edge_failures != 0) + elog(LOG, "[GEQO] failures: %d, average: %d", + edge_failures, (int) number_generations / edge_failures); + else + elog(LOG, "[GEQO] no edge failures detected"); +#else + /* suppress variable-set-but-not-used warnings from some compilers */ + (void) edge_failures; +#endif +#endif + +#if defined(CX) && defined(GEQO_DEBUG) + if (mutations != 0) + elog(LOG, "[GEQO] mutations: %d, generations: %d", + mutations, number_generations); + else + elog(LOG, "[GEQO] no mutations processed"); +#endif + +#ifdef GEQO_DEBUG + print_pool(stdout, pool, 0, pool_size - 1); +#endif + +#ifdef GEQO_DEBUG + elog(DEBUG1, "GEQO best is %.2f after %d generations", + pool->data[0].worth, number_generations); +#endif + + + /* + * got the cheapest query tree processed by geqo; first element of the + * population indicates the best query tree + */ + best_tour = (Gene *) pool->data[0].string; + + best_rel = gimme_tree(root, best_tour, pool->string_length); + + if (best_rel == NULL) + elog(ERROR, "geqo failed to make a valid plan"); + + /* DBG: show the query plan */ +#ifdef NOT_USED + print_plan(best_plan, root); +#endif + + /* ... free memory stuff */ + free_chromo(root, momma); + free_chromo(root, daddy); + +#if defined (ERX) + free_edge_table(root, edge_table); +#elif defined(PMX) + free_chromo(root, kid); +#elif defined(CX) + free_chromo(root, kid); + free_city_table(root, city_table); +#elif defined(PX) + free_chromo(root, kid); + free_city_table(root, city_table); +#elif defined(OX1) + free_chromo(root, kid); + free_city_table(root, city_table); +#elif defined(OX2) + free_chromo(root, kid); + free_city_table(root, city_table); +#endif + + free_pool(root, pool); + + /* ... clear root pointer to our private storage */ + root->join_search_private = NULL; + + return best_rel; +} + + +/* + * Return either configured pool size or a good default + * + * The default is based on query size (no. of relations) = 2^(QS+1), + * but constrained to a range based on the effort value. + */ +static int +gimme_pool_size(int nr_rel) +{ + double size; + int minsize; + int maxsize; + + /* Legal pool size *must* be at least 2, so ignore attempt to select 1 */ + if (Geqo_pool_size >= 2) + return Geqo_pool_size; + + size = pow(2.0, nr_rel + 1.0); + + maxsize = 50 * Geqo_effort; /* 50 to 500 individuals */ + if (size > maxsize) + return maxsize; + + minsize = 10 * Geqo_effort; /* 10 to 100 individuals */ + if (size < minsize) + return minsize; + + return (int) ceil(size); +} + + +/* + * Return either configured number of generations or a good default + * + * The default is the same as the pool size, which allows us to be + * sure that less-fit individuals get pushed out of the breeding + * population before the run finishes. + */ +static int +gimme_number_generations(int pool_size) +{ + if (Geqo_generations > 0) + return Geqo_generations; + + return pool_size; +} diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c new file mode 100644 index 0000000..02b5a70 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------------------ + * + * geqo_misc.c + * misc. printout and debug stuff + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_misc.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +#include "postgres.h" + +#include "optimizer/geqo_misc.h" + + +#ifdef GEQO_DEBUG + + +/* + * avg_pool + */ +static double +avg_pool(Pool *pool) +{ + int i; + double cumulative = 0.0; + + if (pool->size <= 0) + elog(ERROR, "pool_size is zero"); + + /* + * Since the pool may contain multiple occurrences of DBL_MAX, divide by + * pool->size before summing, not after, to avoid overflow. This loses a + * little in speed and accuracy, but this routine is only used for debug + * printouts, so we don't care that much. + */ + for (i = 0; i < pool->size; i++) + cumulative += pool->data[i].worth / pool->size; + + return cumulative; +} + +/* print_pool + */ +void +print_pool(FILE *fp, Pool *pool, int start, int stop) +{ + int i, + j; + + /* be extra careful that start and stop are valid inputs */ + + if (start < 0) + start = 0; + if (stop > pool->size) + stop = pool->size; + + if (start + stop > pool->size) + { + start = 0; + stop = pool->size; + } + + for (i = start; i < stop; i++) + { + fprintf(fp, "%d)\t", i); + for (j = 0; j < pool->string_length; j++) + fprintf(fp, "%d ", pool->data[i].string[j]); + fprintf(fp, "%g\n", pool->data[i].worth); + } + + fflush(fp); +} + +/* print_gen + * + * printout for chromosome: best, worst, mean, average + */ +void +print_gen(FILE *fp, Pool *pool, int generation) +{ + int lowest; + + /* Get index to lowest ranking gene in population. */ + /* Use 2nd to last since last is buffer. */ + lowest = pool->size > 1 ? pool->size - 2 : 0; + + fprintf(fp, + "%5d | Best: %g Worst: %g Mean: %g Avg: %g\n", + generation, + pool->data[0].worth, + pool->data[lowest].worth, + pool->data[pool->size / 2].worth, + avg_pool(pool)); + + fflush(fp); +} + + +void +print_edge_table(FILE *fp, Edge *edge_table, int num_gene) +{ + int i, + j; + + fprintf(fp, "\nEDGE TABLE\n"); + + for (i = 1; i <= num_gene; i++) + { + fprintf(fp, "%d :", i); + for (j = 0; j < edge_table[i].unused_edges; j++) + fprintf(fp, " %d", edge_table[i].edge_list[j]); + fprintf(fp, "\n"); + } + + fprintf(fp, "\n"); + + fflush(fp); +} + +#endif /* GEQO_DEBUG */ diff --git a/src/backend/optimizer/geqo/geqo_mutation.c b/src/backend/optimizer/geqo/geqo_mutation.c new file mode 100644 index 0000000..2af0295 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_mutation.c @@ -0,0 +1,66 @@ +/*------------------------------------------------------------------------ +* +* geqo_mutation.c +* +* TSP mutation routines +* +* src/backend/optimizer/geqo/geqo_mutation.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_mutation.h" +#include "optimizer/geqo_random.h" + +#if defined(CX) /* currently used only in CX mode */ + +void +geqo_mutation(PlannerInfo *root, Gene *tour, int num_gene) +{ + int swap1; + int swap2; + int num_swaps = geqo_randint(root, num_gene / 3, 0); + Gene temp; + + + while (num_swaps > 0) + { + swap1 = geqo_randint(root, num_gene - 1, 0); + swap2 = geqo_randint(root, num_gene - 1, 0); + + while (swap1 == swap2) + swap2 = geqo_randint(root, num_gene - 1, 0); + + temp = tour[swap1]; + tour[swap1] = tour[swap2]; + tour[swap2] = temp; + + + num_swaps -= 1; + } +} + +#endif /* defined(CX) */ diff --git a/src/backend/optimizer/geqo/geqo_ox1.c b/src/backend/optimizer/geqo/geqo_ox1.c new file mode 100644 index 0000000..10d2d0a --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_ox1.c @@ -0,0 +1,95 @@ +/*------------------------------------------------------------------------ +* +* geqo_ox1.c +* +* order crossover [OX] routines; +* OX1 operator according to Davis +* (Proc Int'l Joint Conf on AI) +* +* src/backend/optimizer/geqo/geqo_ox1.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the ox algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(OX1) + +/* ox1 + * + * position crossover + */ +void +ox1(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, + City * city_table) +{ + int left, + right, + k, + p, + temp; + + /* initialize city table */ + for (k = 1; k <= num_gene; k++) + city_table[k].used = 0; + + /* select portion to copy from tour1 */ + left = geqo_randint(root, num_gene - 1, 0); + right = geqo_randint(root, num_gene - 1, 0); + + if (left > right) + { + temp = left; + left = right; + right = temp; + } + + /* copy portion from tour1 to offspring */ + for (k = left; k <= right; k++) + { + offspring[k] = tour1[k]; + city_table[(int) tour1[k]].used = 1; + } + + k = (right + 1) % num_gene; /* index into offspring */ + p = k; /* index into tour2 */ + + /* copy stuff from tour2 to offspring */ + while (k != left) + { + if (!city_table[(int) tour2[p]].used) + { + offspring[k] = tour2[p]; + k = (k + 1) % num_gene; + city_table[(int) tour2[p]].used = 1; + } + p = (p + 1) % num_gene; /* increment tour2-index */ + } + +} + +#endif /* defined(OX1) */ diff --git a/src/backend/optimizer/geqo/geqo_ox2.c b/src/backend/optimizer/geqo/geqo_ox2.c new file mode 100644 index 0000000..72b9b0f --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_ox2.c @@ -0,0 +1,112 @@ +/*------------------------------------------------------------------------ +* +* geqo_ox2.c +* +* order crossover [OX] routines; +* OX2 operator according to Syswerda +* (The Genetic Algorithms Handbook, ed L Davis) +* +* src/backend/optimizer/geqo/geqo_ox2.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the ox algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(OX2) + +/* ox2 + * + * position crossover + */ +void +ox2(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City * city_table) +{ + int k, + j, + count, + pos, + select, + num_positions; + + /* initialize city table */ + for (k = 1; k <= num_gene; k++) + { + city_table[k].used = 0; + city_table[k - 1].select_list = -1; + } + + /* determine the number of positions to be inherited from tour1 */ + num_positions = geqo_randint(root, 2 * num_gene / 3, num_gene / 3); + + /* make a list of selected cities */ + for (k = 0; k < num_positions; k++) + { + pos = geqo_randint(root, num_gene - 1, 0); + city_table[pos].select_list = (int) tour1[pos]; + city_table[(int) tour1[pos]].used = 1; /* mark used */ + } + + + count = 0; + k = 0; + + /* consolidate the select list to adjacent positions */ + while (count < num_positions) + { + if (city_table[k].select_list == -1) + { + j = k + 1; + while ((city_table[j].select_list == -1) && (j < num_gene)) + j++; + + city_table[k].select_list = city_table[j].select_list; + city_table[j].select_list = -1; + count++; + } + else + count++; + k++; + } + + select = 0; + + for (k = 0; k < num_gene; k++) + { + if (city_table[(int) tour2[k]].used) + { + offspring[k] = (Gene) city_table[select].select_list; + select++; /* next city in the select list */ + } + else + /* city isn't used yet, so inherit from tour2 */ + offspring[k] = tour2[k]; + } + +} + +#endif /* defined(OX2) */ diff --git a/src/backend/optimizer/geqo/geqo_pmx.c b/src/backend/optimizer/geqo/geqo_pmx.c new file mode 100644 index 0000000..ddbc781 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_pmx.c @@ -0,0 +1,224 @@ +/*------------------------------------------------------------------------ +* +* geqo_pmx.c +* +* partially matched crossover [PMX] routines; +* PMX operator according to Goldberg & Lingle +* (Proc Int'l Conf on GA's) +* +* src/backend/optimizer/geqo/geqo_pmx.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the pmx algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(PMX) + +/* pmx + * + * partially matched crossover + */ +void +pmx(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene) +{ + int *failed = (int *) palloc((num_gene + 1) * sizeof(int)); + int *from = (int *) palloc((num_gene + 1) * sizeof(int)); + int *indx = (int *) palloc((num_gene + 1) * sizeof(int)); + int *check_list = (int *) palloc((num_gene + 1) * sizeof(int)); + + int left, + right, + temp, + i, + j, + k; + int mx_fail, + found, + mx_hold; + + +/* no mutation so start up the pmx replacement algorithm */ +/* initialize failed[], from[], check_list[] */ + for (k = 0; k < num_gene; k++) + { + failed[k] = -1; + from[k] = -1; + check_list[k + 1] = 0; + } + +/* locate crossover points */ + left = geqo_randint(root, num_gene - 1, 0); + right = geqo_randint(root, num_gene - 1, 0); + + if (left > right) + { + temp = left; + left = right; + right = temp; + } + + +/* copy tour2 into offspring */ + for (k = 0; k < num_gene; k++) + { + offspring[k] = tour2[k]; + from[k] = DAD; + check_list[tour2[k]]++; + } + +/* copy tour1 into offspring */ + for (k = left; k <= right; k++) + { + check_list[offspring[k]]--; + offspring[k] = tour1[k]; + from[k] = MOM; + check_list[tour1[k]]++; + } + + +/* pmx main part */ + + mx_fail = 0; + +/* STEP 1 */ + + for (k = left; k <= right; k++) + { /* for all elements in the tour1-2 */ + + if (tour1[k] == tour2[k]) + found = 1; /* find match in tour2 */ + + else + { + found = 0; /* substitute elements */ + + j = 0; + while (!(found) && (j < num_gene)) + { + if ((offspring[j] == tour1[k]) && (from[j] == DAD)) + { + + check_list[offspring[j]]--; + offspring[j] = tour2[k]; + found = 1; + check_list[tour2[k]]++; + } + + j++; + } + + } + + if (!(found)) + { /* failed to replace gene */ + failed[mx_fail] = (int) tour1[k]; + indx[mx_fail] = k; + mx_fail++; + } + + } /* ... for */ + + +/* STEP 2 */ + + /* see if any genes could not be replaced */ + if (mx_fail > 0) + { + mx_hold = mx_fail; + + for (k = 0; k < mx_hold; k++) + { + found = 0; + + j = 0; + while (!(found) && (j < num_gene)) + { + + if ((failed[k] == (int) offspring[j]) && (from[j] == DAD)) + { + check_list[offspring[j]]--; + offspring[j] = tour2[indx[k]]; + check_list[tour2[indx[k]]]++; + + found = 1; + failed[k] = -1; + mx_fail--; + } + + j++; + } + + } /* ... for */ + + } /* ... if */ + + +/* STEP 3 */ + + for (k = 1; k <= num_gene; k++) + { + + if (check_list[k] > 1) + { + i = 0; + + while (i < num_gene) + { + if ((offspring[i] == (Gene) k) && (from[i] == DAD)) + { + j = 1; + + while (j <= num_gene) + { + if (check_list[j] == 0) + { + offspring[i] = (Gene) j; + check_list[k]--; + check_list[j]++; + i = num_gene + 1; + j = i; + } + + j++; + } + + } /* ... if */ + + i++; + } /* end while */ + + } + } /* ... for */ + + pfree(failed); + pfree(from); + pfree(indx); + pfree(check_list); +} + +#endif /* defined(PMX) */ diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c new file mode 100644 index 0000000..1fc103b --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -0,0 +1,265 @@ +/*------------------------------------------------------------------------ + * + * geqo_pool.c + * Genetic Algorithm (GA) pool stuff + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_pool.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include <float.h> +#include <limits.h> +#include <math.h> + +#include "optimizer/geqo_copy.h" +#include "optimizer/geqo_pool.h" +#include "optimizer/geqo_recombination.h" + + +static int compare(const void *arg1, const void *arg2); + +/* + * alloc_pool + * allocates memory for GA pool + */ +Pool * +alloc_pool(PlannerInfo *root, int pool_size, int string_length) +{ + Pool *new_pool; + Chromosome *chromo; + int i; + + /* pool */ + new_pool = (Pool *) palloc(sizeof(Pool)); + new_pool->size = (int) pool_size; + new_pool->string_length = (int) string_length; + + /* all chromosome */ + new_pool->data = (Chromosome *) palloc(pool_size * sizeof(Chromosome)); + + /* all gene */ + chromo = (Chromosome *) new_pool->data; /* vector of all chromos */ + for (i = 0; i < pool_size; i++) + chromo[i].string = palloc((string_length + 1) * sizeof(Gene)); + + return new_pool; +} + +/* + * free_pool + * deallocates memory for GA pool + */ +void +free_pool(PlannerInfo *root, Pool *pool) +{ + Chromosome *chromo; + int i; + + /* all gene */ + chromo = (Chromosome *) pool->data; /* vector of all chromos */ + for (i = 0; i < pool->size; i++) + pfree(chromo[i].string); + + /* all chromosome */ + pfree(pool->data); + + /* pool */ + pfree(pool); +} + +/* + * random_init_pool + * initialize genetic pool + */ +void +random_init_pool(PlannerInfo *root, Pool *pool) +{ + Chromosome *chromo = (Chromosome *) pool->data; + int i; + int bad = 0; + + /* + * We immediately discard any invalid individuals (those that geqo_eval + * returns DBL_MAX for), thereby not wasting pool space on them. + * + * If we fail to make any valid individuals after 10000 tries, give up; + * this probably means something is broken, and we shouldn't just let + * ourselves get stuck in an infinite loop. + */ + i = 0; + while (i < pool->size) + { + init_tour(root, chromo[i].string, pool->string_length); + pool->data[i].worth = geqo_eval(root, chromo[i].string, + pool->string_length); + if (pool->data[i].worth < DBL_MAX) + i++; + else + { + bad++; + if (i == 0 && bad >= 10000) + elog(ERROR, "geqo failed to make a valid plan"); + } + } + +#ifdef GEQO_DEBUG + if (bad > 0) + elog(DEBUG1, "%d invalid tours found while selecting %d pool entries", + bad, pool->size); +#endif +} + +/* + * sort_pool + * sorts input pool according to worth, from smallest to largest + * + * maybe you have to change compare() for different ordering ... + */ +void +sort_pool(PlannerInfo *root, Pool *pool) +{ + qsort(pool->data, pool->size, sizeof(Chromosome), compare); +} + +/* + * compare + * qsort comparison function for sort_pool + */ +static int +compare(const void *arg1, const void *arg2) +{ + const Chromosome *chromo1 = (const Chromosome *) arg1; + const Chromosome *chromo2 = (const Chromosome *) arg2; + + if (chromo1->worth == chromo2->worth) + return 0; + else if (chromo1->worth > chromo2->worth) + return 1; + else + return -1; +} + +/* alloc_chromo + * allocates a chromosome and string space + */ +Chromosome * +alloc_chromo(PlannerInfo *root, int string_length) +{ + Chromosome *chromo; + + chromo = (Chromosome *) palloc(sizeof(Chromosome)); + chromo->string = (Gene *) palloc((string_length + 1) * sizeof(Gene)); + + return chromo; +} + +/* free_chromo + * deallocates a chromosome and string space + */ +void +free_chromo(PlannerInfo *root, Chromosome *chromo) +{ + pfree(chromo->string); + pfree(chromo); +} + +/* spread_chromo + * inserts a new chromosome into the pool, displacing worst gene in pool + * assumes best->worst = smallest->largest + */ +void +spread_chromo(PlannerInfo *root, Chromosome *chromo, Pool *pool) +{ + int top, + mid, + bot; + int i, + index; + Chromosome swap_chromo, + tmp_chromo; + + /* new chromo is so bad we can't use it */ + if (chromo->worth > pool->data[pool->size - 1].worth) + return; + + /* do a binary search to find the index of the new chromo */ + + top = 0; + mid = pool->size / 2; + bot = pool->size - 1; + index = -1; + + while (index == -1) + { + /* these 4 cases find a new location */ + + if (chromo->worth <= pool->data[top].worth) + index = top; + else if (chromo->worth == pool->data[mid].worth) + index = mid; + else if (chromo->worth == pool->data[bot].worth) + index = bot; + else if (bot - top <= 1) + index = bot; + + + /* + * these 2 cases move the search indices since a new location has not + * yet been found. + */ + + else if (chromo->worth < pool->data[mid].worth) + { + bot = mid; + mid = top + ((bot - top) / 2); + } + else + { /* (chromo->worth > pool->data[mid].worth) */ + top = mid; + mid = top + ((bot - top) / 2); + } + } /* ... while */ + + /* now we have index for chromo */ + + /* + * move every gene from index on down one position to make room for chromo + */ + + /* + * copy new gene into pool storage; always replace worst gene in pool + */ + + geqo_copy(root, &pool->data[pool->size - 1], chromo, pool->string_length); + + swap_chromo.string = pool->data[pool->size - 1].string; + swap_chromo.worth = pool->data[pool->size - 1].worth; + + for (i = index; i < pool->size; i++) + { + tmp_chromo.string = pool->data[i].string; + tmp_chromo.worth = pool->data[i].worth; + + pool->data[i].string = swap_chromo.string; + pool->data[i].worth = swap_chromo.worth; + + swap_chromo.string = tmp_chromo.string; + swap_chromo.worth = tmp_chromo.worth; + } +} diff --git a/src/backend/optimizer/geqo/geqo_px.c b/src/backend/optimizer/geqo/geqo_px.c new file mode 100644 index 0000000..ad5ad3f --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_px.c @@ -0,0 +1,110 @@ +/*------------------------------------------------------------------------ +* +* geqo_px.c +* +* position crossover [PX] routines; +* PX operator according to Syswerda +* (The Genetic Algorithms Handbook, L Davis, ed) +* +* src/backend/optimizer/geqo/geqo_px.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* the px algorithm is adopted from Genitor : */ +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + +#if defined(PX) + +/* px + * + * position crossover + */ +void +px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, + City * city_table) +{ + int num_positions; + int i, + pos, + tour2_index, + offspring_index; + + /* initialize city table */ + for (i = 1; i <= num_gene; i++) + city_table[i].used = 0; + + /* choose random positions that will be inherited directly from parent */ + num_positions = geqo_randint(root, 2 * num_gene / 3, num_gene / 3); + + /* choose random position */ + for (i = 0; i < num_positions; i++) + { + pos = geqo_randint(root, num_gene - 1, 0); + + offspring[pos] = tour1[pos]; /* transfer cities to child */ + city_table[(int) tour1[pos]].used = 1; /* mark city used */ + } + + tour2_index = 0; + offspring_index = 0; + + + /* px main part */ + + while (offspring_index < num_gene) + { + + /* next position in offspring filled */ + if (!city_table[(int) tour1[offspring_index]].used) + { + + /* next city in tour1 not used */ + if (!city_table[(int) tour2[tour2_index]].used) + { + + /* inherit from tour1 */ + offspring[offspring_index] = tour2[tour2_index]; + + tour2_index++; + offspring_index++; + } + else + { /* next city in tour2 has been used */ + tour2_index++; + } + + } + else + { /* next position in offspring is filled */ + offspring_index++; + } + + } + +} + +#endif /* defined(PX) */ diff --git a/src/backend/optimizer/geqo/geqo_random.c b/src/backend/optimizer/geqo/geqo_random.c new file mode 100644 index 0000000..f21bc04 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_random.c @@ -0,0 +1,40 @@ +/*------------------------------------------------------------------------ + * + * geqo_random.c + * random number generator + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_random.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "optimizer/geqo_random.h" + + +void +geqo_set_seed(PlannerInfo *root, double seed) +{ + GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; + + /* + * XXX. This seeding algorithm could certainly be improved - but it is not + * critical to do so. + */ + memset(private->random_state, 0, sizeof(private->random_state)); + memcpy(private->random_state, + &seed, + Min(sizeof(private->random_state), sizeof(seed))); +} + +double +geqo_rand(PlannerInfo *root) +{ + GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; + + return pg_erand48(private->random_state); +} diff --git a/src/backend/optimizer/geqo/geqo_recombination.c b/src/backend/optimizer/geqo/geqo_recombination.c new file mode 100644 index 0000000..a5d3e47 --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_recombination.c @@ -0,0 +1,92 @@ +/*------------------------------------------------------------------------ +* +* geqo_recombination.c +* misc recombination procedures +* +* src/backend/optimizer/geqo/geqo_recombination.c +* +*------------------------------------------------------------------------- +*/ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ + +#include "postgres.h" + +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_recombination.h" + + +/* + * init_tour + * + * Randomly generates a legal "traveling salesman" tour + * (i.e. where each point is visited only once.) + */ +void +init_tour(PlannerInfo *root, Gene *tour, int num_gene) +{ + int i, + j; + + /* + * We must fill the tour[] array with a random permutation of the numbers + * 1 .. num_gene. We can do that in one pass using the "inside-out" + * variant of the Fisher-Yates shuffle algorithm. Notionally, we append + * each new value to the array and then swap it with a randomly-chosen + * array element (possibly including itself, else we fail to generate + * permutations with the last city last). The swap step can be optimized + * by combining it with the insertion. + */ + if (num_gene > 0) + tour[0] = (Gene) 1; + + for (i = 1; i < num_gene; i++) + { + j = geqo_randint(root, i, 0); + /* i != j check avoids fetching uninitialized array element */ + if (i != j) + tour[i] = tour[j]; + tour[j] = (Gene) (i + 1); + } +} + +/* city table is used in these recombination methods: */ +#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2) + +/* alloc_city_table + * + * allocate memory for city table + */ +City * +alloc_city_table(PlannerInfo *root, int num_gene) +{ + City *city_table; + + /* + * palloc one extra location so that nodes numbered 1..n can be indexed + * directly; 0 will not be used + */ + city_table = (City *) palloc((num_gene + 1) * sizeof(City)); + + return city_table; +} + +/* free_city_table + * + * deallocate memory of city table + */ +void +free_city_table(PlannerInfo *root, City * city_table) +{ + pfree(city_table); +} + +#endif /* CX || PX || OX1 || OX2 */ diff --git a/src/backend/optimizer/geqo/geqo_selection.c b/src/backend/optimizer/geqo/geqo_selection.c new file mode 100644 index 0000000..66b6c8a --- /dev/null +++ b/src/backend/optimizer/geqo/geqo_selection.c @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------- + * + * geqo_selection.c + * linear selection scheme for the genetic query optimizer + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/optimizer/geqo/geqo_selection.c + * + *------------------------------------------------------------------------- + */ + +/* contributed by: + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + * Martin Utesch * Institute of Automatic Control * + = = University of Mining and Technology = + * utesch@aut.tu-freiberg.de * Freiberg, Germany * + =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= + */ + +/* this is adopted from D. Whitley's Genitor algorithm */ + +/*************************************************************/ +/* */ +/* Copyright (c) 1990 */ +/* Darrell L. Whitley */ +/* Computer Science Department */ +/* Colorado State University */ +/* */ +/* Permission is hereby granted to copy all or any part of */ +/* this program for free distribution. The author's name */ +/* and this copyright notice must be included in any copy. */ +/* */ +/*************************************************************/ + +#include "postgres.h" + +#include <math.h> + +#include "optimizer/geqo_copy.h" +#include "optimizer/geqo_random.h" +#include "optimizer/geqo_selection.h" + +static int linear_rand(PlannerInfo *root, int max, double bias); + + +/* + * geqo_selection + * according to bias described by input parameters, + * first and second genes are selected from the pool + */ +void +geqo_selection(PlannerInfo *root, Chromosome *momma, Chromosome *daddy, + Pool *pool, double bias) +{ + int first, + second; + + first = linear_rand(root, pool->size, bias); + second = linear_rand(root, pool->size, bias); + + /* + * Ensure we have selected different genes, except if pool size is only + * one, when we can't. + * + * This code was observed to hang up in an infinite loop when the + * platform's implementation of erand48() was broken. We now always use + * our own version. + */ + if (pool->size > 1) + { + while (first == second) + second = linear_rand(root, pool->size, bias); + } + + geqo_copy(root, momma, &pool->data[first], pool->string_length); + geqo_copy(root, daddy, &pool->data[second], pool->string_length); +} + +/* + * linear_rand + * generates random integer between 0 and input max number + * using input linear bias + * + * bias is y-intercept of linear distribution + * + * probability distribution function is: f(x) = bias - 2(bias - 1)x + * bias = (prob of first rule) / (prob of middle rule) + */ +static int +linear_rand(PlannerInfo *root, int pool_size, double bias) +{ + double index; /* index between 0 and pool_size */ + double max = (double) pool_size; + + /* + * If geqo_rand() returns exactly 1.0 then we will get exactly max from + * this equation, whereas we need 0 <= index < max. Also it seems + * possible that roundoff error might deliver values slightly outside the + * range; in particular avoid passing a value slightly less than 0 to + * sqrt(). If we get a bad value just try again. + */ + do + { + double sqrtval; + + sqrtval = (bias * bias) - 4.0 * (bias - 1.0) * geqo_rand(root); + if (sqrtval > 0.0) + sqrtval = sqrt(sqrtval); + index = max * (bias - sqrtval) / 2.0 / (bias - 1.0); + } while (index < 0.0 || index >= max); + + return (int) index; +} |