diff options
Diffstat (limited to '')
-rw-r--r-- | sql/opt_subselect.cc | 648 |
1 files changed, 430 insertions, 218 deletions
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 50a14763..6d9b40cc 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -30,11 +30,14 @@ #include "sql_base.h" #include "sql_const.h" #include "sql_select.h" +#include "sql_update.h" // class Sql_cmd_update +#include "sql_delete.h" // class Sql_cmd_delete #include "filesort.h" #include "opt_subselect.h" #include "sql_test.h" #include <my_bit.h> #include "opt_trace.h" +#include "optimizer_defaults.h" /* This file contains optimizations for semi-join subqueries. @@ -448,7 +451,8 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); static bool convert_subq_to_jtbm(JOIN *parent_join, Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); -static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements); +static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements, + bool *blobs_used); bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables); static SJ_MATERIALIZATION_INFO * at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, @@ -532,6 +536,48 @@ bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs, return FALSE; } +/** + @brief Check whether an IN subquery must be excluded from conversion to SJ + + @param thd global context the processed statement + @returns true if the IN subquery must be excluded from conversion to SJ + + @note + Currently a top level IN subquery of an delete statement is not converted + to SJ if the statement contains ORDER BY ... LIMIT or contains RETURNING. + + @todo + The disjunctive members + !((Sql_cmd_update *) cmd)->is_multitable() + !((Sql_cmd_delete *) cmd)->is_multitable() + will be removed when conversions of IN predicands to semi-joins are + fully supported for single-table UPDATE/DELETE statements. +*/ + +bool SELECT_LEX::is_sj_conversion_prohibited(THD *thd) +{ + DBUG_ASSERT(master_unit()->item->substype() == Item_subselect::IN_SUBS); + + SELECT_LEX *outer_sl= outer_select(); + if (outer_sl->outer_select()) + return false; + + Sql_cmd *cmd= thd->lex->m_sql_cmd; + + switch (thd->lex->sql_command) { + case SQLCOM_UPDATE: + return + !((Sql_cmd_update *) cmd)->is_multitable() && + ((Sql_cmd_update *) cmd)->processing_as_multitable_update_prohibited(thd); + case SQLCOM_DELETE: + return + !((Sql_cmd_delete *) cmd)->is_multitable() && + ((Sql_cmd_delete *) cmd)->processing_as_multitable_delete_prohibited(thd); + default: + return false; + } +} + /* Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them @@ -672,17 +718,6 @@ int check_and_do_in_subquery_rewrites(JOIN *join) DBUG_RETURN(-1); } } - /* Check if any table is not supporting comparable rowids */ - { - List_iterator_fast<TABLE_LIST> li(select_lex->outer_select()->leaf_tables); - TABLE_LIST *tbl; - while ((tbl = li++)) - { - TABLE *table= tbl->table; - if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) - join->not_usable_rowid_map|= table->map; - } - } DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); /* @@ -694,9 +729,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join) 3. Subquery does not have GROUP BY or ORDER BY 4. Subquery does not use aggregate functions or HAVING 5. Subquery predicate is at the AND-top-level of ON/WHERE clause - 6. We are not in a subquery of a single table UPDATE/DELETE that - doesn't have a JOIN (TODO: We should handle this at some - point by switching to multi-table UPDATE/DELETE) + 6. We are not in a subquery of a single-table UPDATE/DELETE that + does not allow conversion to multi-table UPDATE/DELETE 7. We're not in a table-less subquery like "SELECT 1" 8. No execution method was already chosen (by a prepared statement) 9. Parent select is not a table-less select @@ -704,9 +738,10 @@ int check_and_do_in_subquery_rewrites(JOIN *join) 11. It is first optimisation (the subquery could be moved from ON clause during first optimisation and then be considered for SJ on the second when it is too late) - 12. All tables supports comparable rowids. - This is needed for DuplicateWeedout strategy to work (which - is the catch-all semi-join strategy so it must be applicable). + + There are also other requirements which cannot be checked at this phase, + yet. They are checked later in convert_join_subqueries_to_semijoins(), + look for calls to block_conversion_to_sj(). */ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && in_subs && // 1 @@ -714,15 +749,14 @@ int check_and_do_in_subquery_rewrites(JOIN *join) !select_lex->group_list.elements && !join->order && // 3 !join->having && !select_lex->with_sum_func && // 4 in_subs->emb_on_expr_nest && // 5 - select_lex->outer_select()->join && // 6 + !select_lex->is_sj_conversion_prohibited(thd) && // 6 parent_unit->first_select()->leaf_tables.elements && // 7 !in_subs->has_strategy() && // 8 select_lex->outer_select()->table_list.first && // 9 !((join->select_options | // 10 select_lex->outer_select()->join->select_options) // 10 & SELECT_STRAIGHT_JOIN) && // 10 - select_lex->first_cond_optimization && // 11 - join->not_usable_rowid_map == 0) // 12 + select_lex->first_cond_optimization) // 11 { DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); @@ -775,7 +809,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join) */ if (in_subs && !in_subs->has_strategy()) { - if (is_materialization_applicable(thd, in_subs, select_lex)) + if (!select_lex->is_sj_conversion_prohibited(thd) && + is_materialization_applicable(thd, in_subs, select_lex)) { in_subs->add_strategy(SUBS_MATERIALIZATION); @@ -918,8 +953,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs) outer, converted_from_in_predicate)) { - trace_transform.add("possible", false); - trace_transform.add("cause", "types mismatch"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("possible", false). + add("cause", "types mismatch"); DBUG_RETURN(FALSE); } } @@ -941,8 +978,10 @@ bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs) { in_subs->types_allow_materialization= TRUE; in_subs->sjm_scan_allowed= all_are_fields; - trace_transform.add("sjm_scan_allowed", all_are_fields) - .add("possible", true); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("sjm_scan_allowed", all_are_fields). + add("possible", true); DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed")); DBUG_RETURN(TRUE); } @@ -1232,7 +1271,36 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) } } - if (join->select_options & SELECT_STRAIGHT_JOIN) + /* + Compute join->not_usable_rowid_map. + The idea is: + - DuplicateWeedout strategy requires that one is able to get the rowid + (call h->position()) for tables in the parent select. Obtained Rowid + values must be stable across table scans. + = Rowids are typically available. The only known exception is federatedx + tables. + - The optimizer requires that DuplicateWeedout strategy is always + applicable. It is the only strategy that is applicable for any join + order. The optimizer is not prepared for the situation where it has + constructed a join order and then it turns out that there's no semi-join + strategy that can be used for it. + + Because of the above, we will not use semi-joins if the parent select has + tables which do not support rowids. + */ + { + List_iterator_fast<TABLE_LIST> li(join->select_lex->leaf_tables); + TABLE_LIST *tbl; + while ((tbl = li++)) + { + TABLE *table= tbl->table; + if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + join->not_usable_rowid_map|= table->map; + } + } + + if (join->select_options & SELECT_STRAIGHT_JOIN || + join->not_usable_rowid_map != 0) { /* Block conversion to semijoins for all candidates */ li.rewind(); @@ -1302,8 +1370,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform, in_subq->get_select_lex()->select_number, "IN (SELECT)", "semijoin"); - trace_transform.add("converted_to_semi_join", false) - .add("cause", "subquery attached to the ON clause"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("converted_to_semi_join", false). + add("cause", "subquery attached to the ON clause"); break; } @@ -1315,9 +1385,10 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) if (join->table_count + in_subq->unit->first_select()->join->table_count >= MAX_TABLES) { - trace_transform.add("converted_to_semi_join", false); - trace_transform.add("cause", - "table in parent join now exceeds MAX_TABLES"); + if (unlikely(trace_transform.trace_started())) + trace_transform. + add("converted_to_semi_join", false). + add("cause", "table in parent join now exceeds MAX_TABLES"); break; } if (convert_subq_to_sj(join, in_subq)) @@ -1449,6 +1520,7 @@ void get_delayed_table_estimates(TABLE *table, Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; Table_function_json_table *table_function= table->pos_in_table_list->table_function; + handler *file= table->file; if (table_function) { @@ -1468,9 +1540,11 @@ void get_delayed_table_estimates(TABLE *table, /* Calculate cost of scanning the temptable */ double data_size= COST_MULT(item->jtbm_record_count, hash_sj_engine->tmp_table->s->reclength); - /* Do like in handler::scan_time() */ - *scan_time= ((data_size/table->file->stats.block_size+2) * - table->file->avg_io_cost()); + + /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */ + *scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST * + table->file->DISK_READ_RATIO) + + *out_rows * file->ROW_COPY_COST); } @@ -2498,8 +2572,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) !sj_nest->sj_subq_pred->is_correlated && sj_nest->sj_subq_pred->types_allow_materialization) { - join->emb_sjm_nest= sj_nest; - if (choose_plan(join, all_table_map &~join->const_table_map)) + if (choose_plan(join, all_table_map &~join->const_table_map, sj_nest)) DBUG_RETURN(TRUE); /* purecov: inspected */ /* The best plan to run the subquery is now in join->best_positions, @@ -2515,24 +2588,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) sjm->is_used= FALSE; double subjoin_out_rows, subjoin_read_time; - /* - join->get_partial_cost_and_fanout(n_tables + join->const_tables, - table_map(-1), - &subjoin_read_time, - &subjoin_out_rows); - */ - join->get_prefix_cost_and_fanout(n_tables, + join->get_prefix_cost_and_fanout(n_tables, &subjoin_read_time, &subjoin_out_rows); - sjm->materialization_cost.convert_from_cost(subjoin_read_time); + sjm->materialization_cost=subjoin_read_time; sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows; - // Don't use the following list because it has "stale" items. use - // ref_pointer_array instead: - // - //List<Item> &right_expr_list= - // sj_nest->sj_subq_pred->unit->first_select()->item_list; /* Adjust output cardinality estimates. If the subquery has form @@ -2565,8 +2627,12 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) int tableno; double rows= 1.0; while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) - rows= COST_MULT(rows, - join->map2table[tableno]->table->opt_range_condition_rows); + { + ha_rows tbl_rows=join->map2table[tableno]-> + table->opt_range_condition_rows; + + rows= COST_MULT(rows, rows2double(tbl_rows)); + } sjm->rows= MY_MIN(sjm->rows, rows); } memcpy((uchar*) sjm->positions, @@ -2576,33 +2642,43 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) /* Calculate temporary table parameters and usage costs */ + bool blobs_used; uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array, - subq_select->item_list.elements); - double lookup_cost= get_tmp_table_lookup_cost(join->thd, - subjoin_out_rows, rowlen); - double write_cost= get_tmp_table_write_cost(join->thd, - subjoin_out_rows, rowlen); + subq_select->item_list.elements, + &blobs_used); + TMPTABLE_COSTS cost= get_tmp_table_costs(join->thd, + subjoin_out_rows, rowlen, + blobs_used, 1); + double scan_cost, total_cost; + double row_copy_cost= ROW_COPY_COST_THD(thd); /* Let materialization cost include the cost to write the data into the - temporary table: + temporary table. Note that smj->materialization_cost already includes + row copy and compare costs of finding the original row. */ - sjm->materialization_cost.add_io(subjoin_out_rows, write_cost); - + sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create; + /* Set the cost to do a full scan of the temptable (will need this to - consider doing sjm-scan): - */ - sjm->scan_cost.reset(); - sjm->scan_cost.add_io(sjm->rows, lookup_cost); - - sjm->lookup_cost.convert_from_cost(lookup_cost); + consider doing sjm-scan). See ha_scan_time() for the basics of + the calculations. + We don't need to check the where clause for each row, so no + WHERE_COST is needed. + */ + scan_cost= (rowlen * (double) sjm->rows) / cost.block_size; + total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost + + TABLE_SCAN_SETUP_COST_THD(thd) + + row_copy_cost * sjm->rows); + sjm->scan_cost=total_cost; + + /* When reading a row, we have also to check the where clause */ + sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd); sj_nest->sj_mat_info= sjm; DBUG_EXECUTE("opt", print_sjm(sjm);); } } } - join->emb_sjm_nest= NULL; DBUG_RETURN(FALSE); } @@ -2624,11 +2700,13 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) Length of the temptable record, in bytes */ -static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) +static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements, + bool *blobs_used) { uint len= 0; Item *item; - //List_iterator<Item> it(items); + + *blobs_used= 0; for (uint i= 0; i < elements ; i++) { item = p_items[i]; @@ -2651,6 +2729,8 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) len += 8; else len += item->max_length; + if (item->max_length > MAX_FIELD_VARCHARLENGTH) + *blobs_used= 1; break; case DECIMAL_RESULT: len += 10; @@ -2666,46 +2746,62 @@ static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) /** - The cost of a lookup into a unique hash/btree index on a temporary table - with 'row_count' rows each of size 'row_size'. + The cost of a create, write and read into a unique hash/btree index on + a temporary table with 'row_count' rows each of size 'row_size'. @param thd current query context @param row_count number of rows in the temp table @param row_size average size in bytes of the rows - @return the cost of one lookup -*/ - -double -get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size) -{ - if (row_count > thd->variables.max_heap_table_size / (double) row_size) - return (double) DISK_TEMPTABLE_LOOKUP_COST; - else - return (double) HEAP_TEMPTABLE_LOOKUP_COST; -} - -/** - The cost of writing a row into a temporary table with 'row_count' unique - rows each of size 'row_size'. + @return The cost of using the temporary table - @param thd current query context - @param row_count number of rows in the temp table - @param row_size average size in bytes of the rows - - @return the cost of writing one row + TODO: + This is an optimistic estimate. We are not taking into account: + - That we first write into a memory and then overflow to disk. + - If binary trees would be used for heap tables. + - The addition cost of writing a row to memory/disk and possible + index reorganization. */ -double -get_tmp_table_write_cost(THD *thd, double row_count, uint row_size) +TMPTABLE_COSTS +get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used, + bool add_copy_cost) { - double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size); - /* - TODO: - This is an optimistic estimate. Add additional costs resulting from - actually writing the row to memory/disk and possible index reorganization. - */ - return lookup_cost; + TMPTABLE_COSTS cost; + /* From heap_prepare_hp_create_info(), assuming one hash key used */ + row_size+= sizeof(char*)*2; + row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*)); + + if (row_count > thd->variables.max_heap_table_size / (double) row_size || + blobs_used) + { + double row_copy_cost= (add_copy_cost ? + tmp_table_optimizer_costs.row_copy_cost : + 0); + /* Disk based table */ + cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost * + tmp_table_optimizer_costs.disk_read_ratio) + + row_copy_cost); + cost.write= cost.lookup; + cost.create= DISK_TEMPTABLE_CREATE_COST; + cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE; + cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost; + cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio; + } + else + { + /* Values are as they are in heap.h */ + double row_copy_cost= (add_copy_cost ? + heap_optimizer_costs.row_copy_cost : + 0); + cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost; + cost.write= cost.lookup; + cost.create= HEAP_TEMPTABLE_CREATE_COST; + cost.block_size= 1; + cost.avg_io_cost= 0; + cost.cache_hit_ratio= 0; + } + return cost; } @@ -2950,9 +3046,16 @@ void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, 2. using strategy Z is cheaper, but it only removes fanout from semijoin X. 3. We have no clue what to do about fanount of semi-join Y. + + For the first iteration read_time will always be bigger than + *current_read_time (as the 'strategy' is an addition to the + chosen plan) . If a strategy was picked + (dusp_producing_tables & handled_fanout is true), then + *current_read_time is updated and the cost for the next + strategy can be smaller than *current_read_time. */ if ((dups_producing_tables & handled_fanout) || - (read_time < *current_read_time && + (read_time + COST_EPS < *current_read_time && !(handled_fanout & pos->inner_tables_handled_with_other_sjs))) { DBUG_ASSERT(pos->sj_strategy != sj_strategy); @@ -3150,9 +3253,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, mat_read_time= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->lookup_cost.total_cost()))); + mat_info->lookup_cost))); /* NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION @@ -3166,8 +3269,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, *strategy= SJ_OPT_MATERIALIZE; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3201,9 +3305,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, /* Add materialization cost */ prefix_cost= COST_ADD(prefix_cost, - COST_ADD(mat_info->materialization_cost.total_cost(), + COST_ADD(mat_info->materialization_cost, COST_MULT(prefix_rec_count, - mat_info->scan_cost.total_cost()))); + mat_info->scan_cost))); prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows); uint i; @@ -3220,10 +3324,8 @@ bool Sj_materialization_picker::check_qep(JOIN *join, best_access_path(join, join->positions[i].table, rem_tables, join->positions, i, disable_jbuf, prefix_rec_count, &curpos, &dummy); - prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read); + prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out); prefix_cost= COST_ADD(prefix_cost, curpos.read_time); - prefix_cost= COST_ADD(prefix_cost, - prefix_rec_count / TIME_FOR_COMPARE); //TODO: take into account join condition selectivity here } @@ -3248,8 +3350,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join, *handled_fanout= mat_nest->sj_inner_tables; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3348,8 +3451,9 @@ bool LooseScan_picker::check_qep(JOIN *join, *handled_fanout= first->table->emb_sj_nest->sj_inner_tables; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3440,13 +3544,18 @@ bool Firstmatch_picker::check_qep(JOIN *join, optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE)) { /* - An important special case: only one inner table, and @@optimizer_switch - allows join buffering. + An important special case: only one inner table, and + @@optimizer_switch allows join buffering. - read_time is the same (i.e. FirstMatch doesn't add any cost - - remove fanout added by the last table + - remove fanout added by the last table) */ if (*record_count) - *record_count /= join->positions[idx].records_read; + *record_count /= join->positions[idx].records_out; + /* + Remember this choice for + fix_semijoin_strategies_for_picked_join_order() + */ + join->positions[idx].firstmatch_with_join_buf= 1; } else { @@ -3466,8 +3575,9 @@ bool Firstmatch_picker::check_qep(JOIN *join, *strategy= SJ_OPT_FIRST_MATCH; if (unlikely(trace.trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + trace. + add("rows", *record_count). + add("cost", *read_time); } return TRUE; } @@ -3479,6 +3589,56 @@ bool Firstmatch_picker::check_qep(JOIN *join, } +/* + Duplicate_weedout strategy is described at + https://mariadb.com/kb/en/duplicateweedout-strategy/ + + The idea is that if one has a subquery of type: + + select * + from Country + where + Country.code IN (select City.Country + from City + where + ...) + + Before semi join optimization it was executed this way: + - Scan rows in Country + - For each accepted row, execute the sub query with + 'Country.code = City.Country' added to the WHERE clause and with + LIMIT 1 + + With semi join optimization it can be converted to the following semi join. + + select * from Country semi-join City + where Country.code = City.Country and ... + + This is executed as: + + - Scan rows in Country + - Scan rows in City with 'Country.code = City.Country' added to the + subquery WHERE clause. Stop scanning after the first match. + + or + + - Create temporary table to store City.Country (with a unique key) + - Scan rows in City (according to plan for City) and put them into the + temporary table + - Scan the temporary table + - Do index lookup in Country table with City.Country + +With Duplicate_weedout we would try to instead do: + + - Create temporary table to hold unique rowid's for the Country + - Scan rows in City (according to plan for City) + - Scan rows in Country (according to plan for Country) + - Write Country.id rowid to temporary table. If there was no + conflicting row in the temporary table, accept the row combination. + - Delete temporary table +*/ + + void Duplicate_weedout_picker::set_from_prev(POSITION *prev) { if (prev->dups_weedout_picker.is_used) @@ -3543,46 +3703,42 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, */ uint first_tab= first_dupsweedout_table; double dups_cost; - double prefix_rec_count; + double first_weedout_table_rec_count; double sj_inner_fanout= 1.0; double sj_outer_fanout= 1.0; uint temptable_rec_size; - Json_writer_object trace(join->thd); - trace.add("strategy", "DuplicateWeedout"); if (first_tab == join->const_tables) { - prefix_rec_count= 1.0; + first_weedout_table_rec_count= 1.0; temptable_rec_size= 0; dups_cost= 0.0; } else { dups_cost= join->positions[first_tab - 1].prefix_cost; - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + first_weedout_table_rec_count= + join->positions[first_tab - 1].prefix_record_count; temptable_rec_size= 8; /* This is not true but we'll make it so */ } table_map dups_removed_fanout= 0; - double current_fanout= prefix_rec_count; for (uint j= first_dupsweedout_table; j <= idx; j++) { POSITION *p= join->positions + j; - current_fanout= COST_MULT(current_fanout, p->records_read); - dups_cost= COST_ADD(dups_cost, - COST_ADD(p->read_time, - current_fanout / TIME_FOR_COMPARE)); + dups_cost= COST_ADD(dups_cost, p->read_time); + if (p->table->emb_sj_nest) { - sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_read); + sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_out); dups_removed_fanout |= p->table->table->map; } else { + sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_out); /* Ensure that table supports comparable rowids */ DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)); - sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read); temptable_rec_size += p->table->table->file->ref_length; } } @@ -3591,32 +3747,38 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, Add the cost of temptable use. The table will have sj_outer_fanout records, and we will make - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. + - sj_inner_fanout*sj_outer_fanout lookups. + There is no row copy cost (as we are only copying rowid) and no + compare cost (as we are only checking if the row exists by + checking if we got a write error. */ - double one_lookup_cost= get_tmp_table_lookup_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - double one_write_cost= get_tmp_table_write_cost(join->thd, - sj_outer_fanout, - temptable_rec_size); - - double write_cost= COST_MULT(join->positions[first_tab].prefix_record_count, - sj_outer_fanout * one_write_cost); - double full_lookup_cost= - COST_MULT(join->positions[first_tab].prefix_record_count, - COST_MULT(sj_outer_fanout, - sj_inner_fanout * one_lookup_cost)); - dups_cost= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost)); + TMPTABLE_COSTS one_cost= get_tmp_table_costs(join->thd, + sj_outer_fanout, + temptable_rec_size, + 0, 0); + double write_cost= (one_cost.create + + first_weedout_table_rec_count * sj_outer_fanout * one_cost.write); + double full_lookup_cost= (first_weedout_table_rec_count* sj_outer_fanout * + sj_inner_fanout * one_cost.lookup); + *read_time= dups_cost + write_cost + full_lookup_cost; - *read_time= dups_cost; - *record_count= prefix_rec_count * sj_outer_fanout; + *record_count= first_weedout_table_rec_count * sj_outer_fanout; *handled_fanout= dups_removed_fanout; *strategy= SJ_OPT_DUPS_WEEDOUT; - if (unlikely(trace.trace_started())) + if (unlikely(join->thd->trace_started())) { - trace.add("records", *record_count); - trace.add("read_time", *read_time); + Json_writer_object trace(join->thd); + trace. + add("strategy", "DuplicateWeedout"). + add("prefix_row_count", first_weedout_table_rec_count). + add("tmp_table_rows", sj_outer_fanout). + add("sj_inner_fanout", sj_inner_fanout). + add("rows", *record_count). + add("dups_cost", dups_cost). + add("write_cost", write_cost). + add("full_lookup_cost", full_lookup_cost). + add("total_cost", *read_time); } return TRUE; } @@ -3665,33 +3827,37 @@ void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const */ void restore_prev_sj_state(const table_map remaining_tables, - const JOIN_TAB *tab, uint idx) + const JOIN_TAB *tab, uint idx) { TABLE_LIST *emb_sj_nest; - if (tab->emb_sj_nest) + if ((emb_sj_nest= tab->emb_sj_nest)) { - table_map subq_tables= tab->emb_sj_nest->sj_inner_tables; + table_map subq_tables= emb_sj_nest->sj_inner_tables; tab->join->sjm_lookup_tables &= ~subq_tables; - } - if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) - { - table_map subq_tables= emb_sj_nest->sj_inner_tables & - ~tab->join->const_table_map; - /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & subq_tables) == subq_tables) - { - // All non-const tables of the SJ nest are in the remaining_tables. - // we are not in the nest anymore. - tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; - } - else + if (!tab->join->emb_sjm_nest) { - // Semi-join nest has: - // - a table being removed (not in the prefix) - // - some tables in the prefix. - tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + table_map subq_tables= (emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map); + /* If we're removing the last SJ-inner table, remove the sj-nest */ + if ((remaining_tables & subq_tables) == subq_tables) + { + /* + All non-const tables of the SJ nest are in the remaining_tables. + we are not in the nest anymore. + */ + tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } + else + { + /* + Semi-join nest has: + - a table being removed (not in the prefix) + - some tables in the prefix. + */ + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } } } @@ -3803,6 +3969,8 @@ at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) { + DBUG_ASSERT(start >= join->const_tables); + for (uint j= start; j < end ;j++) { double prefix_count; @@ -3810,7 +3978,7 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) prefix_count= 1.0; else prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count, - join->best_positions[j-1].records_read); + join->best_positions[j-1].records_out); join->best_positions[j].prefix_record_count= prefix_count; } @@ -3962,7 +4130,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions, i, FALSE, prefix_rec_count, join->best_positions + i, &dummy); - prefix_rec_count *= join->best_positions[i].records_read; + prefix_rec_count *= join->best_positions[i].records_out; rem_tables &= ~join->best_positions[i].table->table->map; } } @@ -3974,7 +4142,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions[first].n_sj_tables= tablenr - first + 1; POSITION dummy; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; + join->best_positions[first - 1].prefix_record_count; table_map rem_tables= remaining_tables; uint idx; @@ -3988,6 +4156,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) */ join->cur_sj_inner_tables= 0; Json_writer_object semijoin_strategy(thd); + double inner_fanout= 1.0; semijoin_strategy.add("semi_join_strategy","FirstMatch"); Json_writer_array semijoin_plan(thd, "join_order"); for (idx= first; idx <= tablenr; idx++) @@ -3997,16 +4166,33 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) { trace_one_table.add_table_name(join->best_positions[idx].table); } - if (join->best_positions[idx].use_join_buffer) + if (join->best_positions[idx].use_join_buffer && + !join->best_positions[idx].firstmatch_with_join_buf) { - best_access_path(join, join->best_positions[idx].table, - rem_tables, join->best_positions, idx, - TRUE /* no jbuf */, - record_count, join->best_positions + idx, &dummy); + /* + records_out cannot be bigger just because we remove join buffer + */ + double records_out= join->best_positions[idx].records_out; + best_access_path(join, join->best_positions[idx].table, + rem_tables, join->best_positions, idx, + TRUE /* no jbuf */, + record_count, join->best_positions + idx, &dummy); + set_if_smaller(join->best_positions[idx].records_out, records_out); } - record_count *= join->best_positions[idx].records_read; + /* + TODO: We should also compute the selectivity here, as well as adjust + the records_out according to the fraction of records removed by + the semi-join. + */ + double rec_out= join->best_positions[idx].records_out; + if (join->best_positions[idx].table->emb_sj_nest) + inner_fanout *= rec_out; + + record_count *= join->best_positions[idx].records_out; rem_tables &= ~join->best_positions[idx].table->table->map; } + if (inner_fanout > 1.0) + join->best_positions[tablenr].records_out /= inner_fanout; } if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) @@ -4015,7 +4201,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) POSITION *first_pos= join->best_positions + first; POSITION loose_scan_pos; // For loose scan paths double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; + join->best_positions[first - 1].prefix_record_count; table_map rem_tables= remaining_tables; uint idx; @@ -4056,13 +4242,14 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) */ if (join->best_positions[idx].key) { + DBUG_ASSERT(join->best_positions[idx].type != JT_RANGE); delete join->best_positions[idx].table->quick; join->best_positions[idx].table->quick= NULL; } } } rem_tables &= ~join->best_positions[idx].table->table->map; - record_count *= join->best_positions[idx].records_read; + record_count *= join->best_positions[idx].records_out; } first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); @@ -4786,11 +4973,13 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) { DBUG_PRINT("info",("Creating group key in temporary table")); share->keys=1; - share->uniques= MY_TEST(using_unique_constraint); - table->key_info=keyinfo; + table->key_info= share->key_info= keyinfo; keyinfo->key_part=key_part_info; - keyinfo->flags=HA_NOSAME; + keyinfo->flags= HA_NOSAME | (using_unique_constraint ? HA_UNIQUE_HASH : 0); + keyinfo->ext_key_flags= keyinfo->flags; keyinfo->usable_key_parts= keyinfo->user_defined_key_parts= 1; + keyinfo->ext_key_parts= 1; + share->key_parts= 1; keyinfo->key_length=0; keyinfo->rec_per_key=0; keyinfo->algorithm= HA_KEY_ALG_UNDEF; @@ -5281,7 +5470,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, Got a table that's not within any semi-join nest. This is a case like this: - SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2) + SELECT * FROM ot1, nt1 WHERE + ot1.col IN (SELECT expr FROM it1, it2) with a join order of @@ -5758,10 +5948,10 @@ enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) ((subselect_hash_sj_engine*)in_subs->engine); if (!hash_sj_engine->is_materialized) { - hash_sj_engine->materialize_join->exec(); + int error= hash_sj_engine->materialize_join->exec(); hash_sj_engine->is_materialized= TRUE; - if (unlikely(hash_sj_engine->materialize_join->error) || + if (unlikely(error) || unlikely(tab->join->thd->is_fatal_error)) DBUG_RETURN(NESTED_LOOP_ERROR); } @@ -6541,18 +6731,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) IN/ALL/ANY optimizations are not applicable for so called fake select (this select exists only to filter results of union if it is needed). */ - if (select_lex == select_lex->master_unit()->fake_select_lex) - return 0; - - if (is_in_subquery()) - { - in_subs= unit->item->get_IN_subquery(); - if (in_subs->create_in_to_exists_cond(this)) - return true; - } - else + if (select_lex == select_lex->master_unit()->fake_select_lex || + likely(!is_in_subquery())) return false; + in_subs= unit->item->get_IN_subquery(); + if (in_subs->create_in_to_exists_cond(this)) + return true; + /* A strategy must be chosen earlier. */ DBUG_ASSERT(in_subs->has_strategy()); DBUG_ASSERT(in_to_exists_where || in_to_exists_having); @@ -6583,6 +6769,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) /* The cost of the IN->EXISTS strategy. */ double in_exists_strategy_cost; double dummy; + const char *strategy; /* A. Estimate the number of rows of the outer table that will be filtered @@ -6644,7 +6831,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables) /* Get the cost of the modified IN-EXISTS plan. */ inner_read_time_2= inner_join->best_read; - } else { @@ -6656,39 +6842,58 @@ bool JOIN::choose_subquery_plan(table_map join_tables) C. Compute execution costs. */ /* C.1 Compute the cost of the materialization strategy. */ - //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list); - uint rowlen= get_tmp_table_rec_length(ref_ptrs, - select_lex->item_list.elements); - /* The cost of writing one row into the temporary table. */ - double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1, - rowlen); - /* The cost of a lookup into the unique index of the materialized table. */ - double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1, - rowlen); + bool blobs_used; + uint rowlen= get_tmp_table_rec_length(ref_ptrs, + select_lex->item_list.elements, + &blobs_used); + /* The cost of using the temp table */ + TMPTABLE_COSTS cost= get_tmp_table_costs(thd, inner_record_count_1, + rowlen, blobs_used, 1); /* The cost of executing the subquery and storing its result in an indexed temporary table. */ - double materialization_cost= COST_ADD(inner_read_time_1, - COST_MULT(write_cost, - inner_record_count_1)); + double materialization_cost= + COST_ADD(cost.create, + COST_ADD(inner_read_time_1, + COST_MULT((cost.write + WHERE_COST_THD(thd)), + inner_record_count_1))); - materialize_strategy_cost= COST_ADD(materialization_cost, - COST_MULT(outer_lookup_keys, - lookup_cost)); + materialize_strategy_cost= + COST_ADD(materialization_cost, + COST_MULT(outer_lookup_keys, cost.lookup)); /* C.2 Compute the cost of the IN=>EXISTS strategy. */ - in_exists_strategy_cost= COST_MULT(outer_lookup_keys, inner_read_time_2); + in_exists_strategy_cost= + COST_MULT(outer_lookup_keys, inner_read_time_2); /* C.3 Compare the costs and choose the cheaper strategy. */ if (materialize_strategy_cost >= in_exists_strategy_cost) + { in_subs->set_strategy(SUBS_IN_TO_EXISTS); + strategy= "in_to_exists"; + } else + { in_subs->set_strategy(SUBS_MATERIALIZATION); + strategy= "materialization"; + } + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_subquery(thd, "subquery_plan"); + trace_subquery. + add("rows", inner_record_count_1). + add("materialization_cost", materialize_strategy_cost). + add("in_exist_cost", in_exists_strategy_cost). + add("choosen", strategy); + } DBUG_PRINT("info", - ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f", - materialize_strategy_cost, materialization_cost, write_cost, lookup_cost)); + ("mat_strategy_cost: %.2f mat_cost: %.2f write_cost: %.2f " + "lookup_cost: %.2f", + materialize_strategy_cost, materialization_cost, cost.write, + cost.lookup)); DBUG_PRINT("info", ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f", in_exists_strategy_cost, inner_read_time_2)); @@ -6714,6 +6919,13 @@ bool JOIN::choose_subquery_plan(table_map join_tables) implementation, fall back to IN-TO-EXISTS. */ in_subs->set_strategy(SUBS_IN_TO_EXISTS); + + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_subquery(thd, "subquery_plan_revert"); + trace_subquery.add("choosen", "in_to_exists"); + } } if (in_subs->test_strategy(SUBS_MATERIALIZATION)) |