From 06eaf7232e9a920468c0f8d74dcf2fe8b555501c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 14:24:36 +0200 Subject: Adding upstream version 1:10.11.6. Signed-off-by: Daniel Baumann --- sql/opt_split.cc | 1415 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1415 insertions(+) create mode 100644 sql/opt_split.cc (limited to 'sql/opt_split.cc') diff --git a/sql/opt_split.cc b/sql/opt_split.cc new file mode 100644 index 00000000..85ca234f --- /dev/null +++ b/sql/opt_split.cc @@ -0,0 +1,1415 @@ +/* + Copyright (c) 2017, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + This file contains functions to support the splitting technique. + This optimization technique can be applied to equi-joins involving + materialized tables such as materialized views, materialized derived tables + and materialized CTEs. The technique also could be applied to materialized + semi-joins though the code below does not support this usage yet. + + Here are the main ideas behind this technique that we'll call SM optimization + (SplitMaterialization). + + Consider the query + SELECT t1.a, t.min + FROM t1, (SELECT t2.a, MIN(t2.b) as min FROM t2 GROUP BY t2.a) t + WHERE t1.a = t.a and t1.b < const + + Re-write the query into + SELECT t1.a, t.min + FROM t1, LATERAL (SELECT t2.a, MIN(t2.b) as min + FROM t2 WHERE t2.a = t1.a GROUP BY t2.a) t + WHERE t1.b < const + + The execution of the original query (Q1) does the following: + 1. Executes the query in the specification of the derived table + and puts the result set into a temporary table with an index + on the first column. + 2. Joins t1 with the temporary table using the its index. + + The execution of the transformed query (Q1R) follows these steps: + 1. For each row of t1 where t1.b < const a temporary table + containing all rows of of t2 with t2.a = t1.a is created + 2. If there are any rows in the temporary table aggregation + is performed for them + 3. The result of the aggregation is joined with t1. + + The second execution can win if: + a) There is an efficient way to select rows of t2 for which t2.a = t1.a + (For example if there is an index on t2.a) + and + b) The number of temporary tables created for partitions + is much smaller that the total number of partitions + + It should be noted that for the transformed query aggregation + for a partition may be performed several times. + + As we can see the optimization basically splits table t2 into + partitions and performs aggregation over each of them + independently. + + If we have only one equi-join condition then we either push it as + for Q1R or we don't. In a general case we may have much more options. + Consider the query (Q3) + SELECT * + FROM t1,t2 (SELECT t3.a, t3.b, MIN(t3.c) as min + FROM t3 GROUP BY a,b) t + WHERE t.a = t1.a AND t.b = t2.b + AND t1.c < c1 and t2.c < c2 + AND P(t1,t2); + (P(t1,t2) designates some additional conditions over columns of t1,t2). + + Assuming that there indexes on t3(a,b) and t3(b) here we have several + reasonable options to push equi-join conditions into the derived. + All these options should be taken into account when the optimizer + evaluates different join orders. When the join order (t1,t,t2) is + evaluated there is only one way of splitting : to push the condition + t.a = t1.a into t. With the join order (t2,t,t1) only the condition + t.b = t2.b can be pushed. When the join orders (t1,t2,t) and (t2,t1,t) + are evaluated then the optimizer should consider pushing t.a = t1.a, + t.b = t2.b and (t.a = t1.a AND t.b = t2.b) to choose the best condition + for splitting. Apparently here last condition is the best one because + it provides the miximum possible number of partitions. + + If we dropped the index on t3(a,b) and created the index on t3(a) instead + then we would have two options for splitting: to push t.a = t1.a or to + push t.b = t2.b. If the selectivity of the index t3(a) is better than + the selectivity of t3(b) then the first option is preferred. + + Although the condition (t.a = t1.a AND t.b = t2.b) provides a better + splitting than the condition t.a = t1.a the latter will be used for + splitting if the execution plan with the join order (t1,t,t2) turns out + to be the cheapest one. It's quite possible when the join condition + P(t1,t2) has a bad selectivity. + + Whenever the optimizer evaluates the cost of using a splitting it + compares it with the cost of materialization without splitting. + + If we just drop the index on t3(a,b) the chances that the splitting + will be used becomes much lower but they still exists providing that + the fanout of the partial join of t1 and t2 is small enough. + + The lateral derived table LT formed as a result of SM optimization applied + to a materialized derived table DT must be joined after all parameters + of splitting has been evaluated, i.e. after all expressions used in the + equalities pushed into DT that make the employed splitting effective + could be evaluated. With the chosen join order all the parameters can be + evaluated after the last table LPT that contains any columns referenced in + the parameters has been joined and the table APT following LPT in the chosen + join order is accessed. + Usually the formed lateral derived table LT is accessed right after the table + LPT. As in such cases table LT must be refilled for each combination of + splitting parameters this table must be populated before each access to LT + and the estimate of the expected number of refills that could be suggested in + such cases is the number of rows in the partial join ending with table LPT. + However in other cases the chosen join order may contain tables between LPT + and LT. + Consider the query (Q4) + SELECT * + FROM t1 JOIN t2 ON t1.b = t2.b + LEFT JOIN (SELECT t3.a, t3.b, MIN(t3.c) as min + FROM t3 GROUP BY a,b) t + ON t.a = t1.a AND t.c > 0 + [WHERE P(t1,t2)]; + Let's assume that the join order t1,t2,t was chosen for this query and + SP optimization was applied to t with splitting over t3.a using the index + on column t3.a. Here the table t1 serves as LPT, t2 as APT while t with + pushed condition t.a = t1.a serves as LT. Note that here LT is accessed + after t2, not right after t1. Here the number of refills of the lateral + derived is not more that the number of key values of t1.a that might be + less than the cardinality of the partial join (t1,t2). That's why it makes + sense to signal that t3 has to be refilled just before t2 is accessed. + However if the cardinality of the partial join (t1,t2) happens to be less + than the cardinality of the partial join (t1) due to additional selective + condition P(t1,t2) then the flag informing about necessity of a new refill + can be set either when accessing t2 or right after it has been joined. + The current code sets such flag right after generating a record of the + partial join with minimal cardinality for all those partial joins that + end between APT and LT. It allows sometimes to push extra conditions + into the lateral derived without any increase of the number of refills. + However this flag can be set only after the last join table between + APT and LT using join buffer has been joined. +*/ + +/* + Splitting can be applied to a materialized table specified by the query + with post-join operations that require partitioning of the result set produced + by the join expression used in the FROM clause the query such as GROUP BY + operation and window function operation. In any of these cases the post-join + operation can be executed independently for any partition only over the rows + of this partition. Also if the set of all partitions is divided into disjoint + subsets the operation can applied to each subset independently. In this case + all rows are first partitioned into the groups each of which contains all the + rows from the partitions belonging the same subset and then each group + is subpartitioned into groups in the the post join operation. + + The set of all rows belonging to the union of several partitions is called + here superpartition. If a grouping operation is defined by the list + e_1,...,e_n then any set S = {e_i1,...,e_ik} can be used to devide all rows + into superpartions such that for any two rows r1, r2 the following holds: + e_ij(r1) = e_ij(r2) for each e_ij from S. We use the splitting technique + only if S consists of references to colums of the joined tables. + For example if the GROUP BY list looks like this a, g(b), c we can consider + applying the splitting technique to the superpartitions defined by {a,c}, + {a}, {c} (a and c here may be the references to the columns from different + tables). +*/ + + /* + The following describes when and how the optimizer decides whether it + makes sense to employ the splitting technique. + + 1. For each instance of a materialized table (derived/view/CTE) it is + checked that it is potentially splittable. Now it is done right after the + execution plan for the select specifying this table has been chosen. + + 2. Any potentially splittable materialized table T is subject to two-phase + optimization. It means that the optimizer first builds the best execution + plan for join that specifies T. Then the control is passed back to the + optimization process of the embedding select Q. After the execution plan + for Q has been chosen the optimizer finishes the optimization of the join + specifying T. + + 3. When the optimizer builds the container with the KEYUSE structures + for the join of embedding select it detects the equi-join conditions + PC that potentially could be pushed into a potentially splittable + materialized table T. The collected information about such conditions + is stored together with other facts on potential splittings for table T. + + 4. When the optimizer starts looking for the best execution plan for the + embedding select Q for each potentially splittable materialized table T + it creates special KEYUSE structures for pushable equi-join conditions + PC. These structures are used to add new elements to the container + of KEYUSE structures built for T. The specifics of these elements is + that they can be ebabled and disabled during the process of choosing + the best plan for Q. + + 5. When the optimizer extends a partial join order with a potentially + splittable materialized table T (in function best_access_path) it + first evaluates a new execution plan for the modified specification + of T that adds all equi-join conditions that can be pushed with + current join prefix to the WHERE conditions of the original + specification of T. If the cost of the new plan is better than the + the cost of the original materialized table then the optimizer + prefers to use splitting for the current join prefix. As the cost + of the plan depends only on the pushed conditions it makes sense + to cache this plan for other prefixes. + + 6. The optimizer takes into account the cost of splitting / materialization + of a potentially splittable materialized table T as a startup cost + to access table T. + + 7. When the optimizer finally chooses the best execution plan for + the embedding select Q and this plan prefers using splitting + for table T with pushed equi-join conditions PC then the execution + plan for the underlying join with these conditions is chosen for T. +*/ + +/* + The implementation of the splitting technique below allows to apply + the technique only to a materialized derived table / view / CTE whose + specification is either a select with GROUP BY or a non-grouping select + with window functions that share the same PARTITION BY list. +*/ + +#include "mariadb.h" +#include "sql_select.h" +#include "opt_trace.h" + +/* Info on a splitting field */ +struct SplM_field_info +{ + /* Splitting field in the materialized table T */ + Field *mat_field; + /* The item from the select list of the specification of T */ + Item *producing_item; + /* The corresponding splitting field from the specification of T */ + Field *underlying_field; +}; + + +/* Info on the splitting execution plan saved in SplM_opt_info::cache */ +struct SplM_plan_info +{ + /* The cached splitting execution plan P */ + POSITION *best_positions; + /* The cost of the above plan */ + double cost; + /* Selectivity of splitting used in P */ + double split_sel; + /* For fast search of KEYUSE_EXT elements used for splitting in P */ + struct KEYUSE_EXT *keyuse_ext_start; + /* The tables that contains the fields used for splitting in P */ + TABLE *table; + /* The number of the key from 'table' used for splitting in P */ + uint key; + /* Number of the components of 'key' used for splitting in P */ + uint parts; +}; + + +/* + The structure contains the information that is used by the optimizer + for potentially splittable materialization of T that is a materialized + derived_table / view / CTE +*/ +class SplM_opt_info : public Sql_alloc +{ +public: + /* The join for the select specifying T */ + JOIN *join; + /* The map of tables from 'join' whose columns can be used for partitioning */ + table_map tables_usable_for_splitting; + /* Info about the fields of the joined tables usable for splitting */ + SplM_field_info *spl_fields; + /* The number of elements in the above list */ + uint spl_field_cnt; + /* The list of equalities injected into WHERE for split optimization */ + List inj_cond_list; + /* Contains the structures to generate all KEYUSEs for pushable equalities */ + List added_key_fields; + /* The cache of evaluated execution plans for 'join' with pushed equalities */ + List plan_cache; + /* Cost of best execution plan for join when nothing is pushed */ + double unsplit_cost; + /* Cardinality of T when nothing is pushed */ + double unsplit_card; + /* Lastly evaluated execution plan for 'join' with pushed equalities */ + SplM_plan_info *last_plan; + double last_refills; + + SplM_plan_info *find_plan(TABLE *table, uint key, uint parts); +}; + + +void TABLE::set_spl_opt_info(SplM_opt_info *spl_info) +{ + if (spl_info) + spl_info->join->spl_opt_info= spl_info; + spl_opt_info= spl_info; +} + + +void TABLE::deny_splitting() +{ + DBUG_ASSERT(spl_opt_info != NULL); + spl_opt_info->join->spl_opt_info= NULL; + spl_opt_info= NULL; +} + + +double TABLE::get_materialization_cost() +{ + DBUG_ASSERT(spl_opt_info != NULL); + return spl_opt_info->unsplit_cost; +} + + +/* This structure is auxiliary and used only in the function that follows it */ +struct SplM_field_ext_info: public SplM_field_info +{ + uint item_no; + bool is_usable_for_ref_access; +}; + + +/** + @brief + Check whether this join is one for potentially splittable materialized table + + @details + The function checks whether this join is for select that specifies + a potentially splittable materialized table T. If so, the collected + info on potential splittability of T is attached to the field spl_opt_info + of the TABLE structure for T. + + The function returns a positive answer if the following holds: + 1. the optimizer switch 'split_materialized' is set 'on' + 2. the select owning this join specifies a materialized derived/view/cte T + 3. this is the only select in the specification of T + 4. condition pushdown is not prohibited into T + 5. T is not recursive + 6. not all of this join are constant or optimized away + 7. T is either + 7.1. a grouping table with GROUP BY list P + or + 7.2. a non-grouping table with window functions over the same non-empty + partition specified by the PARTITION BY list P + 8. P contains some references on the columns of the joined tables C + occurred also in the select list of this join + 9. There are defined some keys usable for ref access of fields from C + with available statistics. + 10. The select doesn't use WITH ROLLUP (This limitation can probably be + lifted) + + @retval + true if the answer is positive + false otherwise +*/ + +bool JOIN::check_for_splittable_materialized() +{ + ORDER *partition_list= 0; + st_select_lex_unit *unit= select_lex->master_unit(); + TABLE_LIST *derived= unit->derived; + if (!(optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED)) || // !(1) + !(derived && derived->is_materialized_derived()) || // !(2) + (unit->first_select()->next_select()) || // !(3) + (derived->prohibit_cond_pushdown) || // !(4) + (derived->is_recursive_with_table()) || // !(5) + (table_count == 0 || const_tables == top_join_tab_count) || // !(6) + rollup.state != ROLLUP::STATE_NONE) // (10) + return false; + if (group_list) // (7.1) + { + if (!select_lex->have_window_funcs()) + partition_list= group_list; + } + else if (select_lex->have_window_funcs() && + select_lex->window_specs.elements == 1) // (7.2) + { + partition_list= + select_lex->window_specs.head()->partition_list->first; + } + if (!partition_list) + return false; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_split(thd, "check_split_materialized"); + + ORDER *ord; + Dynamic_array candidates(PSI_INSTRUMENT_MEM); + + /* + Select from partition_list all candidates for splitting. + A candidate must be + - field item or refer to such (8.1) + - item mentioned in the select list (8.2) + Put info about such candidates into the array candidates + */ + table_map usable_tables= 0; // tables that contains the candidate + for (ord= partition_list; ord; ord= ord->next) + { + Item *ord_item= *ord->item; + if (ord_item->real_item()->type() != Item::FIELD_ITEM) // !(8.1) + continue; + + Field *ord_field= ((Item_field *) (ord_item->real_item()))->field; + + /* Ignore fields from of inner tables of outer joins */ + TABLE_LIST *tbl= ord_field->table->pos_in_table_list; + if (tbl->is_inner_table_of_outer_join()) + continue; + + List_iterator li(fields_list); + Item *item; + uint item_no= 0; + while ((item= li++)) + { + if ((*ord->item)->eq(item, 0)) // (8.2) + { + SplM_field_ext_info new_elem; + new_elem.producing_item= item; + new_elem.item_no= item_no; + new_elem.mat_field= derived->table->field[item_no]; + new_elem.underlying_field= ord_field; + new_elem.is_usable_for_ref_access= false; + candidates.push(new_elem); + usable_tables|= ord_field->table->map; + break; + } + item_no++; + } + } + if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2) + { + trace_split.add("not_applicable", "group list has no candidates"); + return false; + } + + /* + For each table from this join find the keys that can be used for ref access + of the fields mentioned in the 'array candidates' + */ + + SplM_field_ext_info *cand; + SplM_field_ext_info *cand_start= &candidates.at(0); + SplM_field_ext_info *cand_end= cand_start + candidates.elements(); + + for (JOIN_TAB *tab= join_tab; + tab < join_tab + top_join_tab_count; tab++) + { + TABLE *table= tab->table; + if (!(table->map & usable_tables)) + continue; + + table->keys_usable_for_splitting.clear_all(); + uint i; + for (i= 0; i < table->s->keys; i++) + { + if (!table->keys_in_use_for_query.is_set(i)) + continue; + KEY *key_info= table->key_info + i; + uint key_parts= table->actual_n_key_parts(key_info); + uint usable_kp_cnt= 0; + for ( ; usable_kp_cnt < key_parts; usable_kp_cnt++) + { + if (key_info->actual_rec_per_key(usable_kp_cnt) == 0) + break; + int fldnr= key_info->key_part[usable_kp_cnt].fieldnr; + + for (cand= cand_start; cand < cand_end; cand++) + { + if (cand->underlying_field->table == table && + cand->underlying_field->field_index + 1 == fldnr) + { + cand->is_usable_for_ref_access= true; + break; + } + } + if (cand == cand_end) + break; + } + if (usable_kp_cnt) + table->keys_usable_for_splitting.set_bit(i); + } + } + + /* Count the candidate fields that can be accessed by ref */ + uint spl_field_cnt= (uint)candidates.elements(); + for (cand= cand_start; cand < cand_end; cand++) + { + if (!cand->is_usable_for_ref_access) + spl_field_cnt--; + } + + if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9) + { + trace_split.add("not_applicable", + "no candidate field can be accessed through ref"); + return false; + } + + /* + Create a structure of the type SplM_opt_info and fill it with + the collected info on potential splittability of T + */ + SplM_opt_info *spl_opt_info= new (thd->mem_root) SplM_opt_info(); + SplM_field_info *spl_field= + (SplM_field_info *) (thd->calloc(sizeof(SplM_field_info) * + spl_field_cnt)); + + if (!(spl_opt_info && spl_field)) // consider T as not good for splitting + return false; + + spl_opt_info->join= this; + spl_opt_info->tables_usable_for_splitting= 0; + spl_opt_info->spl_field_cnt= spl_field_cnt; + spl_opt_info->spl_fields= spl_field; + + { + Json_writer_array trace_range(thd, "split_candidates"); + for (cand= cand_start; cand < cand_end; cand++) + { + if (!cand->is_usable_for_ref_access) + continue; + trace_range.add(cand->producing_item); + + spl_field->producing_item= cand->producing_item; + spl_field->underlying_field= cand->underlying_field; + spl_field->mat_field= cand->mat_field; + spl_opt_info->tables_usable_for_splitting|= + cand->underlying_field->table->map; + spl_field++; + } + } + + /* Attach this info to the table T */ + derived->table->set_spl_opt_info(spl_opt_info); + + /* + If this is specification of a materialized derived table T that is + potentially splittable and is used in the from list of the right operand + of an IN predicand transformed to a semi-join then the embedding semi-join + nest is not allowed to be materialized. + */ + if (derived && derived->is_materialized_derived() && + derived->embedding && derived->embedding->sj_subq_pred) + derived->embedding->sj_subq_pred->types_allow_materialization= FALSE; + return true; +} + + +/** + @brief + Collect info on KEY_FIELD usable for splitting + + @param + key_field KEY_FIELD to collect info on + + @details + The function assumes that this table is potentially splittable. + The function checks whether the KEY_FIELD structure key_field built for + this table was created for a splitting field f. If so, the function does + the following using info from key_field: + 1. Builds an equality of the form f = key_field->val that could be + pushed into this table. + 2. Creates a new KEY_FIELD structure for this equality and stores + a reference to this structure in this->spl_opt_info. +*/ + +void TABLE::add_splitting_info_for_key_field(KEY_FIELD *key_field) +{ + DBUG_ASSERT(spl_opt_info != NULL); + JOIN *join= spl_opt_info->join; + Field *field= key_field->field; + SplM_field_info *spl_field= spl_opt_info->spl_fields; + uint i= spl_opt_info->spl_field_cnt; + for ( ; i; i--, spl_field++) + { + if (spl_field->mat_field == field) + break; + } + if (!i) // field is not usable for splitting + return; + + /* + Any equality condition that can be potentially pushed into the + materialized derived table is constructed now though later it may turn out + that it is not needed, because it is not used for splitting. + The reason for this is that the failure to construct it when it has to be + injected causes denial for further processing of the query. + Formally this equality is needed in the KEY_FIELD structure constructed + here that will be used to generate additional keyuses usable for splitting. + However key_field.cond could be used for this purpose (see implementations + of virtual function can_optimize_keypart_ref()). + + The condition is built in such a form that it can be added to the WHERE + condition of the select that specifies this table. + */ + THD *thd= in_use; + Item *left_item= spl_field->producing_item->build_clone(thd); + Item *right_item= key_field->val->build_clone(thd); + Item_func_eq *eq_item= 0; + if (left_item && right_item) + { + right_item->walk(&Item::set_fields_as_dependent_processor, + false, join->select_lex); + right_item->update_used_tables(); + eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item); + } + if (!eq_item) + return; + KEY_FIELD *added_key_field= + (KEY_FIELD *) thd->alloc(sizeof(KEY_FIELD)); + if (!added_key_field || + spl_opt_info->added_key_fields.push_back(added_key_field,thd->mem_root)) + return; + added_key_field->field= spl_field->underlying_field; + added_key_field->cond= eq_item; + added_key_field->val= key_field->val; + added_key_field->level= 0; + added_key_field->optimize= KEY_OPTIMIZE_EQ; + added_key_field->eq_func= true; + + Item *real= key_field->val->real_item(); + if ((real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) + added_key_field->null_rejecting= true; + else + added_key_field->null_rejecting= false; + + added_key_field->cond_guard= NULL; + added_key_field->sj_pred_no= UINT_MAX; + return; +} + + +static bool +add_ext_keyuse_for_splitting(Dynamic_array *ext_keyuses, + KEY_FIELD *added_key_field, uint key, uint part) +{ + KEYUSE_EXT keyuse_ext; + Field *field= added_key_field->field; + + JOIN_TAB *tab=field->table->reginfo.join_tab; + key_map possible_keys=field->get_possible_keys(); + possible_keys.intersect(field->table->keys_usable_for_splitting); + tab->keys.merge(possible_keys); + + Item_func_eq *eq_item= (Item_func_eq *) (added_key_field->cond); + keyuse_ext.table= field->table; + keyuse_ext.val= eq_item->arguments()[1]; + keyuse_ext.key= key; + keyuse_ext.keypart=part; + keyuse_ext.keypart_map= (key_part_map) 1 << part; + keyuse_ext.used_tables= keyuse_ext.val->used_tables(); + keyuse_ext.optimize= added_key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; + keyuse_ext.ref_table_rows= 0; + keyuse_ext.null_rejecting= added_key_field->null_rejecting; + keyuse_ext.cond_guard= added_key_field->cond_guard; + keyuse_ext.sj_pred_no= added_key_field->sj_pred_no; + keyuse_ext.validity_ref= 0; + keyuse_ext.needed_in_prefix= added_key_field->val->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + keyuse_ext.validity_var= false; + return ext_keyuses->push(keyuse_ext); +} + + +static int +sort_ext_keyuse(KEYUSE_EXT *a, KEYUSE_EXT *b) +{ + if (a->table->tablenr != b->table->tablenr) + return (int) (a->table->tablenr - b->table->tablenr); + if (a->key != b->key) + return (int) (a->key - b->key); + return (int) (a->keypart - b->keypart); +} + + +static void +sort_ext_keyuses(Dynamic_array *keyuses) +{ + KEYUSE_EXT *first_keyuse= &keyuses->at(0); + my_qsort(first_keyuse, keyuses->elements(), sizeof(KEYUSE_EXT), + (qsort_cmp) sort_ext_keyuse); +} + + +/** + @brief + Add info on keyuses usable for splitting into an array +*/ + +static bool +add_ext_keyuses_for_splitting_field(Dynamic_array *ext_keyuses, + KEY_FIELD *added_key_field) +{ + Field *field= added_key_field->field; + TABLE *table= field->table; + for (uint key= 0; key < table->s->keys; key++) + { + if (!(table->keys_usable_for_splitting.is_set(key))) + continue; + KEY *key_info= table->key_info + key; + uint key_parts= table->actual_n_key_parts(key_info); + KEY_PART_INFO *key_part_info= key_info->key_part; + for (uint part=0; part < key_parts; part++, key_part_info++) + { + if (!field->eq(key_part_info->field)) + continue; + if (add_ext_keyuse_for_splitting(ext_keyuses, added_key_field, key, part)) + return true; + } + } + return false; +} + + +/* + @brief + Cost of the post join operation used in specification of splittable table +*/ + +static +double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len) +{ + double cost; + cost= get_tmp_table_write_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to fill tmp table + cost+= get_tmp_table_lookup_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to perform post join operation used here + cost+= get_tmp_table_lookup_cost(thd, join_record_count, rec_len) + + (join_record_count == 0 ? 0 : + join_record_count * log2 (join_record_count)) * + SORT_INDEX_CMP_COST; // cost to perform sorting + return cost; +} + +/** + @brief + Add KEYUSE structures that can be usable for splitting + + @details + This function is called only for joins created for potentially + splittable materialized tables. The function does the following: + 1. Creates the dynamic array ext_keyuses_for_splitting of KEYUSE_EXT + structures and fills is with info about all keyuses that + could be used for splitting. + 2. Sort the array ext_keyuses_for_splitting for fast access by key + on certain columns. + 3. Collects and stores cost and cardinality info on the best execution + plan that does not use splitting and save this plan together with + corresponding array of keyuses. + 4. Expand this array with KEYUSE elements built from the info stored + in ext_keyuses_for_splitting that could be produced by pushed + equalities employed for splitting. + 5. Prepare the extended array of keyuses to be used in the function + best_access_plan() +*/ + +void JOIN::add_keyuses_for_splitting() +{ + uint i; + size_t idx; + KEYUSE_EXT *keyuse_ext; + KEYUSE_EXT keyuse_ext_end; + double oper_cost; + uint rec_len; + uint added_keyuse_count; + TABLE *table= select_lex->master_unit()->derived->table; + List_iterator_fast li(spl_opt_info->added_key_fields); + KEY_FIELD *added_key_field; + if (!spl_opt_info->added_key_fields.elements) + goto err; + if (!(ext_keyuses_for_splitting= new Dynamic_array(PSI_INSTRUMENT_MEM))) + goto err; + while ((added_key_field= li++)) + { + (void) add_ext_keyuses_for_splitting_field(ext_keyuses_for_splitting, + added_key_field); + } + added_keyuse_count= (uint)ext_keyuses_for_splitting->elements(); + if (!added_keyuse_count) + goto err; + sort_ext_keyuses(ext_keyuses_for_splitting); + bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end)); + if (ext_keyuses_for_splitting->push(keyuse_ext_end)) + goto err; + // psergey-todo: trace anything here? + spl_opt_info->unsplit_card= join_record_count; + + rec_len= table->s->rec_buff_length; + + oper_cost= spl_postjoin_oper_cost(thd, join_record_count, rec_len); + + spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time + + oper_cost; + + if (!(save_qep= new Join_plan_state(table_count + 1))) + goto err; + + save_query_plan(save_qep); + + if (!keyuse.buffer && + my_init_dynamic_array(PSI_INSTRUMENT_ME, &keyuse, sizeof(KEYUSE), + 20, 64, MYF(MY_THREAD_SPECIFIC))) + goto err; + + if (allocate_dynamic(&keyuse, save_qep->keyuse.elements + added_keyuse_count)) + goto err; + + idx= keyuse.elements= save_qep->keyuse.elements; + if (keyuse.elements) + memcpy(keyuse.buffer, + save_qep->keyuse.buffer, + (size_t) keyuse.elements * keyuse.size_of_element); + + keyuse_ext= &ext_keyuses_for_splitting->at(0); + for (i=0; i < added_keyuse_count; i++, keyuse_ext++, idx++) + { + set_dynamic(&keyuse, (KEYUSE *) keyuse_ext, idx); + KEYUSE *added_keyuse= ((KEYUSE *) (keyuse.buffer)) + idx; + added_keyuse->validity_ref= &keyuse_ext->validity_var; + } + + if (sort_and_filter_keyuse(this, &keyuse, true)) + goto err; + optimize_keyuse(this, &keyuse); + + for (uint i= 0; i < table_count; i++) + { + JOIN_TAB *tab= join_tab + i; + map2table[tab->table->tablenr]= tab; + } + + return; + +err: + if (save_qep) + restore_query_plan(save_qep); + table->deny_splitting(); + return; +} + + +/** + @brief + Add KEYUSE structures that can be usable for splitting of this joined table +*/ + +void JOIN_TAB::add_keyuses_for_splitting() +{ + DBUG_ASSERT(table->spl_opt_info != NULL); + SplM_opt_info *spl_opt_info= table->spl_opt_info; + spl_opt_info->join->add_keyuses_for_splitting(); +} + + +/* + @brief + Find info on the splitting plan by the splitting key +*/ + +SplM_plan_info *SplM_opt_info::find_plan(TABLE *table, uint key, uint parts) +{ + List_iterator_fast li(plan_cache); + SplM_plan_info *spl_plan; + while ((spl_plan= li++)) + { + if (spl_plan->table == table && + spl_plan->key == key && + spl_plan->parts == parts) + break; + } + return spl_plan; +} + + +/* + @breaf + Enable/Disable a keyuses that can be used for splitting + */ + +static +void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, + TABLE *table, uint key, + table_map excluded_tables, + bool validity_val) +{ + KEYUSE_EXT *keyuse_ext= key_keyuse_ext_start; + do + { + if (!(keyuse_ext->needed_in_prefix & excluded_tables)) + { + /* + The enabling/disabling flags are set just in KEYUSE_EXT structures. + Yet keyuses that are used by best_access_path() have pointers + to these flags. + */ + keyuse_ext->validity_var= validity_val; + } + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); +} + + +/** + @brief + Choose the best splitting to extend the evaluated partial join + + @param + idx index for joined table T in current partial join P + remaining_tables tables not joined yet + spl_pd_boundary OUT bitmap of the table from P extended by T that + starts the sub-sequence of tables S from which + no conditions are allowed to be pushed into T. + + @details + This function is called during the search for the best execution + plan of the join that contains this table T. The function is called + every time when the optimizer tries to extend a partial join by + joining it with table T. Depending on what tables are already in the + partial join different equalities usable for splitting can be pushed + into T. The function evaluates different variants and chooses the + best one. Then the function finds the plan for the materializing join + with the chosen equality conditions pushed into it. If the cost of the + plan turns out to be less than the cost of the best plan without + splitting the function set it as the true plan of materialization + of the table T. + The function caches the found plans for materialization of table T + together with the info what key was used for splitting. Next time when + the optimizer prefers to use the same key the plan is taken from + the cache of plans + + @retval + Pointer to the info on the found plan that employs the pushed equalities + if the plan has been chosen, NULL - otherwise. + If the function returns NULL the value of spl_param_tables is set to 0. +*/ + +SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, + table_map remaining_tables, + const POSITION *join_positions, + table_map *spl_pd_boundary) +{ + SplM_opt_info *spl_opt_info= table->spl_opt_info; + DBUG_ASSERT(spl_opt_info != NULL); + JOIN *join= spl_opt_info->join; + THD *thd= join->thd; + table_map tables_usable_for_splitting= + spl_opt_info->tables_usable_for_splitting; + KEYUSE_EXT *keyuse_ext= &join->ext_keyuses_for_splitting->at(0); + KEYUSE_EXT *UNINIT_VAR(best_key_keyuse_ext_start); + TABLE *best_table= 0; + double best_rec_per_key= DBL_MAX; + SplM_plan_info *spl_plan= 0; + uint best_key= 0; + uint best_key_parts= 0; + table_map best_param_tables= 0L; + Json_writer_object trace_obj(thd, "choose_best_splitting"); + Json_writer_array trace_arr(thd, "considered_keys"); + /* + Check whether there are keys that can be used to join T employing splitting + and if so, select the best out of such keys + */ + for (uint tablenr= 0; tablenr < join->table_count; tablenr++) + { + if (!((1ULL << tablenr) & tables_usable_for_splitting)) + continue; + JOIN_TAB *tab= join->map2table[tablenr]; + TABLE *table= tab->table; + if (keyuse_ext->table != table) + continue; + do + { + uint key= keyuse_ext->key; + KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext; + key_part_map found_parts= 0; + table_map needed_in_prefix= 0; + do + { + if (keyuse_ext->needed_in_prefix & + (remaining_tables | this->join->sjm_lookup_tables)) + { + keyuse_ext++; + continue; + } + if (!(keyuse_ext->keypart_map & found_parts)) + { + if ((!found_parts && !keyuse_ext->keypart) || + (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts))) + found_parts|= keyuse_ext->keypart_map; + else + { + do + { + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); + break; + } + } + KEY *key_info= table->key_info + key; + double rec_per_key= + key_info->actual_rec_per_key(keyuse_ext->keypart); + needed_in_prefix|= keyuse_ext->needed_in_prefix; + if (rec_per_key < best_rec_per_key) + { + best_table= keyuse_ext->table; + best_key= keyuse_ext->key; + best_key_parts= keyuse_ext->keypart + 1; + best_rec_per_key= rec_per_key; + best_key_keyuse_ext_start= key_keyuse_ext_start; + best_param_tables= needed_in_prefix; + // trace table, key_name, parts, needed_tables. + Json_writer_object cur_index(thd); + cur_index. + add("table_name", best_table->alias.ptr()). + add("index", best_table->key_info[best_key].name). + add("rec_per_key", best_rec_per_key). + add("param_tables", best_param_tables); + } + keyuse_ext++; + } + while (keyuse_ext->key == key && keyuse_ext->table == table); + } + while (keyuse_ext->table == table); + } + trace_arr.end(); + + spl_opt_info->last_plan= 0; + double refills= DBL_MAX; + table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables; + if (best_table) + { + *spl_pd_boundary= this->table->map; + if (!best_param_tables) + refills= 1; + else + { + table_map last_found= this->table->map; + for (const POSITION *pos= &join_positions[idx - 1]; ; pos--) + { + if (pos->table->table->map & excluded_tables) + continue; + if (pos->partial_join_cardinality < refills) + { + *spl_pd_boundary= last_found; + refills= pos->partial_join_cardinality; + } + last_found= pos->table->table->map; + if ((last_found & best_param_tables) || pos->use_join_buffer) + break; + } + } + + trace_obj.add("refills", refills). + add("spl_pd_boundary", *spl_pd_boundary); + + /* + The key for splitting was chosen, look for the plan for this key + in the cache + */ + spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts); + if (!spl_plan) + { + /* + The plan for the chosen key has not been found in the cache. + Build a new plan and save info on it in the cache + */ + Json_writer_array wrapper(thd, "split_plan_search"); + table_map all_table_map= (((table_map) 1) << join->table_count) - 1; + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, true); + choose_plan(join, all_table_map & ~join->const_table_map); + + wrapper.end(); + /* + Check that the chosen plan is really a splitting plan. + If not or if there is not enough memory to save the plan in the cache + then just return with no splitting plan. + */ + POSITION *first_non_const_pos= join->best_positions + join->const_tables; + TABLE *table= first_non_const_pos->table->table; + key_map spl_keys= table->keys_usable_for_splitting; + if (!(first_non_const_pos->key && + spl_keys.is_set(first_non_const_pos->key->key)) || + !(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) || + !(spl_plan->best_positions= + (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) || + spl_opt_info->plan_cache.push_back(spl_plan)) + { + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, false); + trace_obj.add("split_plan_discarded", "constructed unapplicable query plan"); + return 0; + } + + spl_plan->keyuse_ext_start= best_key_keyuse_ext_start; + spl_plan->table= best_table; + spl_plan->key= best_key; + spl_plan->parts= best_key_parts; + spl_plan->split_sel= best_rec_per_key / + (spl_opt_info->unsplit_card ? + spl_opt_info->unsplit_card : 1); + + uint rec_len= table->s->rec_buff_length; + + double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel; + double oper_cost= split_card * + spl_postjoin_oper_cost(thd, split_card, rec_len); + spl_plan->cost= join->best_positions[join->table_count-1].read_time + + + oper_cost; + + memcpy((char *) spl_plan->best_positions, + (char *) join->best_positions, + sizeof(POSITION) * join->table_count); + reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, + best_key, excluded_tables, false); + } + else + trace_obj.add("cached_plan_found", 1); + + if (spl_plan) + { + if (unlikely(thd->trace_started())) + { + trace_obj. + add("lead_table", spl_plan->table->alias.ptr()). + add("index", spl_plan->table->key_info[spl_plan->key].name). + add("parts", spl_plan->parts). + add("split_sel", spl_plan->split_sel). + add("cost", spl_plan->cost). + add("unsplit_cost", spl_opt_info->unsplit_cost). + add("records", (ha_rows) (records * spl_plan->split_sel)); + } + + if (refills * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) + { + /* + The best plan that employs splitting is cheaper than + the plan without splitting + */ + spl_opt_info->last_plan= spl_plan; + spl_opt_info->last_refills= refills; + trace_obj.add("chosen", true); + } + else + trace_obj.add("chosen", false); + } + } + + /* Set the cost of the preferred materialization for this partial join */ + records= (ha_rows)spl_opt_info->unsplit_card; + spl_plan= spl_opt_info->last_plan; + if (spl_plan) + { + startup_cost= spl_opt_info->last_refills * spl_plan->cost; + records= (ha_rows) (records * spl_plan->split_sel); + } + else + { + startup_cost= spl_opt_info->unsplit_cost; + *spl_pd_boundary= 0; + } + return spl_plan; +} + + +/** + @brief + Inject equalities for splitting used by the materialization join + + @param + excluded_tables used to filter out the equalities that are not + to be pushed. + + @details + This function injects equalities pushed into a derived table T for which + the split optimization has been chosen by the optimizer. The function + is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt(). + All equalities usable for splitting T whose right parts do not depend on + any of the 'excluded_tables' can be pushed into the where clause of the + derived table T. + The function also marks the select that specifies T as + UNCACHEABLE_DEPENDENT_INJECTED. + + @retval + false on success + true on failure +*/ + +bool JOIN::inject_best_splitting_cond(table_map excluded_tables) +{ + Item *inj_cond= 0; + List *inj_cond_list= &spl_opt_info->inj_cond_list; + List_iterator li(spl_opt_info->added_key_fields); + KEY_FIELD *added_key_field; + while ((added_key_field= li++)) + { + if (excluded_tables & added_key_field->val->used_tables()) + continue; + if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root)) + return true; + } + DBUG_ASSERT(inj_cond_list->elements); + switch (inj_cond_list->elements) { + case 1: + inj_cond= inj_cond_list->head(); break; + default: + inj_cond= new (thd->mem_root) Item_cond_and(thd, *inj_cond_list); + if (!inj_cond) + return true; + } + if (inj_cond) + inj_cond->fix_fields(thd,0); + + if (inject_cond_into_where(inj_cond->copy_andor_structure(thd))) + return true; + + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + st_select_lex_unit *unit= select_lex->master_unit(); + unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; + + return false; +} + + +/** + @brief + Test if equality is injected for split optimization + + @param + eq_item equality to to test + + @retval + true eq_item is equality injected for split optimization + false otherwise +*/ + +bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item) +{ + Item *left_item= eq_item->arguments()[0]->real_item(); + if (left_item->type() != Item::FIELD_ITEM) + return false; + Field *field= ((Item_field *) left_item)->field; + if (!field->table->reginfo.join_tab) + return false; + JOIN *join= field->table->reginfo.join_tab->join; + if (!join->spl_opt_info) + return false; + List_iterator_fast li(join->spl_opt_info->inj_cond_list); + Item *item; + while ((item= li++)) + { + if (item == eq_item) + return true; + } + return false; +} + + +/** + @brief + Fix the splitting chosen for a splittable table in the final query plan + + @param + spl_plan info on the splitting plan chosen for the splittable table T + excluded_tables tables that cannot be used in equalities pushed into T + is_const_table the table T is a constant table + + @details + If in the final query plan the optimizer has chosen a splitting plan + then the function sets this plan as the final execution plan to + materialized the table T. Otherwise the plan that does not use + splitting is set for the materialization. + + @retval + false on success + true on failure +*/ + +bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan, + table_map excluded_tables, + bool is_const_table) +{ + SplM_opt_info *spl_opt_info= table->spl_opt_info; + DBUG_ASSERT(table->spl_opt_info != 0); + JOIN *md_join= spl_opt_info->join; + if (spl_plan && !is_const_table) + { + is_split_derived= true; + memcpy((char *) md_join->best_positions, + (char *) spl_plan->best_positions, + sizeof(POSITION) * md_join->table_count); + /* + This is called for a proper work of JOIN::get_best_combination() + called for the join that materializes T + */ + reset_validity_vars_for_keyuses(spl_plan->keyuse_ext_start, + spl_plan->table, + spl_plan->key, + excluded_tables, + true); + } + else if (md_join->save_qep) + { + md_join->restore_query_plan(md_join->save_qep); + } + return false; +} + + +/** + @brief + Fix the splittings chosen splittable tables in the final query plan + + @details + The function calls JOIN_TAB::fix_splittins for all potentially + splittable tables in this join to set all final materialization + plans chosen for these tables. + + @retval + false on success + true on failure +*/ + +bool JOIN::fix_all_splittings_in_plan() +{ + table_map prev_tables= 0; + table_map all_tables= (table_map(1) << table_count) - 1; + for (uint tablenr= 0; tablenr < table_count; tablenr++) + { + POSITION *cur_pos= &best_positions[tablenr]; + JOIN_TAB *tab= cur_pos->table; + if (tab->table->is_splittable()) + { + SplM_plan_info *spl_plan= cur_pos->spl_plan; + table_map excluded_tables= (all_tables & ~prev_tables) | + sjm_lookup_tables; + ; + if (spl_plan) + { + POSITION *pos= cur_pos; + table_map spl_pd_boundary= pos->spl_pd_boundary; + do + { + excluded_tables|= pos->table->table->map; + } + while (!((pos--)->table->table->map & spl_pd_boundary)); + } + if (tab->fix_splitting(spl_plan, + excluded_tables, + tablenr < const_tables )) + return true; + } + prev_tables|= tab->table->map; + } + return false; +} + + +/** + @brief + Inject splitting conditions into WHERE of split derived + + @details + The function calls JOIN_TAB::inject_best_splitting_cond() for each + materialized derived table T used in this join for which the split + optimization has been chosen by the optimizer. It is done in order to + inject equalities pushed into the where clause of the specification + of T that would be helpful to employ the splitting technique. + + @retval + false on success + true on failure +*/ + +bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt() +{ + table_map prev_tables= 0; + table_map all_tables= (table_map(1) << table_count) - 1; + for (uint tablenr= 0; tablenr < table_count; tablenr++) + { + POSITION *cur_pos= &best_positions[tablenr]; + JOIN_TAB *tab= cur_pos->table; + prev_tables|= tab->table->map; + if (!(tab->table->is_splittable() && cur_pos->spl_plan)) + continue; + SplM_opt_info *spl_opt_info= tab->table->spl_opt_info; + JOIN *join= spl_opt_info->join; + table_map excluded_tables= (all_tables & ~prev_tables) | sjm_lookup_tables; + table_map spl_pd_boundary= cur_pos->spl_pd_boundary; + for (POSITION *pos= cur_pos; ; pos--) + { + excluded_tables|= pos->table->table->map; + pos->table->no_forced_join_cache= true; + if (pos->table->table->map & spl_pd_boundary) + { + pos->table->split_derived_to_update|= tab->table->map; + break; + } + } + + if (join->inject_best_splitting_cond(excluded_tables)) + return true; + } + return false; +} -- cgit v1.2.3