diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:24:36 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:24:36 +0000 |
commit | 06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /sql/sql_select.cc | |
parent | Initial commit. (diff) | |
download | mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r-- | sql/sql_select.cc | 32034 |
1 files changed, 32034 insertions, 0 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc new file mode 100644 index 00000000..f4cbed58 --- /dev/null +++ b/sql/sql_select.cc @@ -0,0 +1,32034 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + mysql_select and join optimization + + + @defgroup Query_Optimizer Query Optimizer + @{ +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "sql_select.h" +#include "sql_cache.h" // query_cache_* +#include "sql_table.h" // primary_key_name +#include "probes_mysql.h" +#include "key.h" // key_copy, key_cmp, key_cmp_if_same +#include "lock.h" // mysql_unlock_some_tables, + // mysql_unlock_read_tables +#include "sql_show.h" // append_identifier +#include "sql_base.h" // setup_wild, setup_fields, fill_record +#include "sql_parse.h" // check_stack_overrun +#include "sql_partition.h" // make_used_partitions_str +#include "sql_test.h" // print_where, print_keyuse_array, + // print_sjm, print_plan, TEST_join +#include "records.h" // init_read_record, end_read_record +#include "filesort.h" // filesort_free_buffers +#include "sql_union.h" // mysql_union +#include "opt_subselect.h" +#include "sql_derived.h" +#include "sql_statistics.h" +#include "sql_cte.h" +#include "sql_window.h" +#include "tztime.h" + +#include "debug_sync.h" // DEBUG_SYNC +#include <m_ctype.h> +#include <my_bit.h> +#include <hash.h> +#include <ft_global.h> +#include "sys_vars_shared.h" +#include "sp_head.h" +#include "sp_rcontext.h" +#include "rowid_filter.h" +#include "select_handler.h" +#include "my_json_writer.h" +#include "opt_trace.h" +#include "derived_handler.h" +#include "create_tmp_table.h" + +/* + A key part number that means we're using a fulltext scan. + + In order not to confuse it with regular equalities, we need to pick + a number that's greater than MAX_REF_PARTS. + + Hash Join code stores field->field_index in KEYUSE::keypart, so the + number needs to be bigger than MAX_FIELDS, also. + + CAUTION: sql_test.cc has its own definition of FT_KEYPART. +*/ +#define FT_KEYPART (MAX_FIELDS+10) + +const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", + "MAYBE_REF","ALL","range","index","fulltext", + "ref_or_null","unique_subquery","index_subquery", + "index_merge", "hash_ALL", "hash_range", + "hash_index", "hash_index_merge" }; + +LEX_CSTRING group_key= {STRING_WITH_LEN("group_key")}; +LEX_CSTRING distinct_key= {STRING_WITH_LEN("distinct_key")}; + +struct st_sargable_param; + +static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves, + DYNAMIC_ARRAY *keyuse); +static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, + JOIN_TAB *join_tab, + uint tables, COND *conds, + table_map table_map, SELECT_LEX *select_lex, + SARGABLE_PARAM **sargables); +static int sort_keyuse(KEYUSE *a,KEYUSE *b); +static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables); +static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, + bool allow_full_scan, table_map used_tables); +static bool get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit, + ha_rows *quick_count); +static void optimize_straight_join(JOIN *join, table_map join_tables); +static bool greedy_search(JOIN *join, table_map remaining_tables, + uint depth, uint use_cond_selectivity); + +enum enum_best_search { + SEARCH_ABORT= -2, + SEARCH_ERROR= -1, + SEARCH_OK= 0, + SEARCH_FOUND_EDGE=1 +}; + +static enum_best_search +best_extension_by_limited_search(JOIN *join, + table_map remaining_tables, + uint idx, double record_count, + double read_time, uint depth, + uint use_cond_selectivity, + table_map *processed_eq_ref_tables); +static uint determine_search_depth(JOIN* join); +C_MODE_START +static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2); +static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2); +static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2); +C_MODE_END +static uint cache_record_length(JOIN *join,uint index); +static store_key *get_store_key(THD *thd, + KEYUSE *keyuse, table_map used_tables, + KEY_PART_INFO *key_part, uchar *key_buff, + uint maybe_null); +static bool make_outerjoin_info(JOIN *join); +static Item* +make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables, + table_map sjm_tables, bool inside_or_clause); +static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item); +static void revise_cache_usage(JOIN_TAB *join_tab); +static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after); +static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); +static void update_depend_map(JOIN *join); +static void update_depend_map_for_order(JOIN *join, ORDER *order); +static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond, + bool change_list, bool *simple_order); +static int return_zero_rows(JOIN *join, select_result *res, + List<TABLE_LIST> *tables, + List<Item> *fields, bool send_row, + ulonglong select_options, const char *info, + Item *having, List<Item> *all_fields); +static COND *build_equal_items(JOIN *join, COND *cond, + COND_EQUAL *inherited, + List<TABLE_LIST> *join_list, + bool ignore_on_conds, + COND_EQUAL **cond_equal_ref, + bool link_equal_fields= FALSE); +static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab, + COND *cond, + COND_EQUAL *cond_equal, + void *table_join_idx, + bool do_substitution); +static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, + COND *conds, bool top, bool in_sj); +static bool check_interleaving_with_nj(JOIN_TAB *next); +static void restore_prev_nj_state(JOIN_TAB *last); +static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list); +static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list, + uint first_unused); + +static COND *optimize_cond(JOIN *join, COND *conds, + List<TABLE_LIST> *join_list, + bool ignore_on_conds, + Item::cond_result *cond_value, + COND_EQUAL **cond_equal, + int flags= 0); +bool const_expression_in_where(COND *conds,Item *item, Item **comp_item); +static int do_select(JOIN *join, Procedure *procedure); + +static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int); +static enum_nested_loop_state +evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab); +static enum_nested_loop_state +end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +static enum_nested_loop_state +end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + +static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos); +static int join_read_system(JOIN_TAB *tab); +static int join_read_const(JOIN_TAB *tab); +static int join_read_key(JOIN_TAB *tab); +static void join_read_key_unlock_row(st_join_table *tab); +static void join_const_unlock_row(JOIN_TAB *tab); +static int join_read_always_key(JOIN_TAB *tab); +static int join_read_last_key(JOIN_TAB *tab); +static int join_no_more_records(READ_RECORD *info); +static int join_read_next(READ_RECORD *info); +static int join_init_quick_read_record(JOIN_TAB *tab); +static quick_select_return test_if_quick_select(JOIN_TAB *tab); +static int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab); +static int join_read_first(JOIN_TAB *tab); +static int join_read_next(READ_RECORD *info); +static int join_read_next_same(READ_RECORD *info); +static int join_read_last(JOIN_TAB *tab); +static int join_read_prev_same(READ_RECORD *info); +static int join_read_prev(READ_RECORD *info); +static int join_ft_read_first(JOIN_TAB *tab); +static int join_ft_read_next(READ_RECORD *info); +int join_read_always_key_or_null(JOIN_TAB *tab); +int join_read_next_same_or_null(READ_RECORD *info); +static COND *make_cond_for_table(THD *thd, Item *cond,table_map table, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond); +static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, + Item *cond, + table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond, + bool retain_ref_cond, + bool is_top_and_level); + +static Item* part_of_refkey(TABLE *form,Field *field); +uint find_shortest_key(TABLE *table, const key_map *usable_keys); +static bool test_if_cheaper_ordering(const JOIN_TAB *tab, + ORDER *order, TABLE *table, + key_map usable_keys, int key, + ha_rows select_limit, + int *new_key, int *new_key_direction, + ha_rows *new_select_limit, + uint *new_used_key_parts= NULL, + uint *saved_best_key_parts= NULL); +static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *); +static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order, + ha_rows select_limit, bool no_changes, + const key_map *map, + bool *fatal_error); +static bool list_contains_unique_index(TABLE *table, + bool (*find_func) (Field *, void *), void *data); +static bool find_field_in_item_list (Field *field, void *data); +static bool find_field_in_order_list (Field *field, void *data); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); +static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field, + SORT_FIELD *sortorder, ulong keylength, + Item *having); +static int remove_dup_with_hash_index(THD *thd,TABLE *table, + uint field_count, Field **first_field, + SORT_FIELD *sortorder, + ulong key_length,Item *having); +static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref); +static bool setup_new_fields(THD *thd, List<Item> &fields, + List<Item> &all_fields, ORDER *new_order); +static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, + ORDER *order, List<Item> &fields, + List<Item> &all_fields, + bool *all_order_by_fields_used); +static bool test_if_subpart(ORDER *group_by, ORDER *order_by); +static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables, + table_map const_tables); +static void calc_group_buffer(JOIN *join, ORDER *group); +static bool make_group_fields(JOIN *main_join, JOIN *curr_join); +static bool alloc_group_fields(JOIN *join, ORDER *group); +static bool alloc_order_fields(JOIN *join, ORDER *group, + uint max_number_of_elements); +// Create list for using with tempory table +static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &new_list1, + List<Item> &new_list2, + uint elements, List<Item> &items); +// Create list for using with tempory table +static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &new_list1, + List<Item> &new_list2, + uint elements, List<Item> &items); +static void init_tmptable_sum_functions(Item_sum **func); +static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table); +static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end); +static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab); +static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr); +static bool prepare_sum_aggregators(THD *thd, Item_sum **func_ptr, + bool need_distinct); +static bool init_sum_functions(Item_sum **func, Item_sum **end); +static bool update_sum_func(Item_sum **func); +static void select_describe(JOIN *join, bool need_tmp_table,bool need_order, + bool distinct, const char *message=NullS); +static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); +static uint make_join_orderinfo(JOIN *join); +static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array); + +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl); +JOIN_TAB *first_depth_first_tab(JOIN* join); +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab); + +static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, + uint n_top_tabs_count, JOIN_TAB *tab); +static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *, + List<Item> &, List<Item> &, bool, bool, bool); + +static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables); +void set_postjoin_aggr_write_func(JOIN_TAB *tab); + +static Item **get_sargable_cond(JOIN *join, TABLE *table); + +bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item); + +void print_list_item(String *str, List_item *list, + enum_query_type query_type); + +static +bool build_notnull_conds_for_range_scans(JOIN *join, COND *cond, + table_map allowed); +static +void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, + TABLE_LIST *nest_tbl); +static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex); +static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); +static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond); + +#ifndef DBUG_OFF + +/* + SHOW EXPLAIN testing: wait for, and serve n_calls APC requests. +*/ +void dbug_serve_apcs(THD *thd, int n_calls) +{ + const char *save_proc_info= thd->proc_info; + + /* Busy-wait for n_calls APC requests to arrive and be processed */ + int n_apcs= thd->apc_target.n_calls_processed + n_calls; + while (thd->apc_target.n_calls_processed < n_apcs) + { + /* This is so that mysqltest knows we're ready to serve requests: */ + thd_proc_info(thd, "show_explain_trap"); + my_sleep(30000); + thd_proc_info(thd, save_proc_info); + if (unlikely(thd->check_killed(1))) + break; + } +} + + +/* + Debugging: check if @name=value, comparing as integer + + Intended usage: + + DBUG_EXECUTE_IF("show_explain_probe_2", + if (dbug_user_var_equals_int(thd, "select_id", select_id)) + dbug_serve_apcs(thd, 1); + ); + +*/ + +bool dbug_user_var_equals_int(THD *thd, const char *name, int value) +{ + user_var_entry *var; + LEX_CSTRING varname= { name, strlen(name)}; + if ((var= get_variable(&thd->user_vars, &varname, FALSE))) + { + bool null_value; + longlong var_value= var->val_int(&null_value); + if (!null_value && var_value == value) + return TRUE; + } + return FALSE; +} + +/* + Debugging : check if @name= value, comparing as string + + Intended usage : + + DBUG_EXECUTE_IF("log_slow_statement_end", + if (dbug_user_var_equals_str(thd, "show_explain_probe_query", + thd->query())) + dbug_serve_apcs(thd, 1); + ); +*/ + +bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value) +{ + user_var_entry *var; + LEX_CSTRING varname= {name, strlen(name)}; + if ((var= get_variable(&thd->user_vars, &varname, FALSE))) + { + bool null_value; + String str; + auto var_value= var->val_str(&null_value, &str, 10)->ptr(); + if (!null_value && !strncmp(var_value, value, strlen(value))) + return TRUE; + } + return FALSE; +} +#endif /* DBUG_OFF */ + +/* + Intialize POSITION structure. +*/ + +POSITION::POSITION() +{ + table= 0; + records_read= cond_selectivity= read_time= 0.0; + prefix_record_count= 0.0; + key= 0; + use_join_buffer= 0; + sj_strategy= SJ_OPT_NONE; + n_sj_tables= 0; + spl_plan= 0; + range_rowid_filter_info= 0; + ref_depend_map= dups_producing_tables= 0; + inner_tables_handled_with_other_sjs= 0; + type= JT_UNKNOWN; + key_dependent= 0; + dups_weedout_picker.set_empty(); + firstmatch_picker.set_empty(); + loosescan_picker.set_empty(); + sjmat_picker.set_empty(); +} + + +void JOIN::init(THD *thd_arg, List<Item> &fields_arg, + ulonglong select_options_arg, select_result *result_arg) +{ + join_tab= 0; + table= 0; + table_count= 0; + top_join_tab_count= 0; + const_tables= 0; + const_table_map= found_const_table_map= not_usable_rowid_map= 0; + aggr_tables= 0; + eliminated_tables= 0; + join_list= 0; + implicit_grouping= FALSE; + sort_and_group= 0; + first_record= 0; + do_send_rows= 1; + duplicate_rows= send_records= 0; + found_records= accepted_rows= 0; + fetch_limit= HA_POS_ERROR; + thd= thd_arg; + sum_funcs= sum_funcs2= 0; + procedure= 0; + having= tmp_having= having_history= 0; + having_is_correlated= false; + group_list_for_estimates= 0; + select_options= select_options_arg; + result= result_arg; + lock= thd_arg->lock; + select_lex= 0; //for safety + select_distinct= MY_TEST(select_options & SELECT_DISTINCT); + no_order= 0; + simple_order= 0; + simple_group= 0; + ordered_index_usage= ordered_index_void; + need_distinct= 0; + skip_sort_order= 0; + with_two_phase_optimization= 0; + save_qep= 0; + spl_opt_info= 0; + ext_keyuses_for_splitting= 0; + spl_opt_info= 0; + need_tmp= 0; + hidden_group_fields= 0; /*safety*/ + error= 0; + select= 0; + return_tab= 0; + ref_ptrs.reset(); + items0.reset(); + items1.reset(); + items2.reset(); + items3.reset(); + zero_result_cause= 0; + optimization_state= JOIN::NOT_OPTIMIZED; + have_query_plan= QEP_NOT_PRESENT_YET; + initialized= 0; + cleaned= 0; + cond_equal= 0; + having_equal= 0; + exec_const_cond= 0; + group_optimized_away= 0; + no_rows_in_result_called= 0; + positions= best_positions= 0; + pushdown_query= 0; + original_join_tab= 0; + explain= NULL; + tmp_table_keep_current_rowid= 0; + allowed_top_level_tables= 0; + + all_fields= fields_arg; + if (&fields_list != &fields_arg) /* Avoid valgrind-warning */ + fields_list= fields_arg; + non_agg_fields.empty(); + bzero((char*) &keyuse,sizeof(keyuse)); + having_value= Item::COND_UNDEF; + tmp_table_param.init(); + tmp_table_param.end_write_records= HA_POS_ERROR; + rollup.state= ROLLUP::STATE_NONE; + + no_const_tables= FALSE; + first_select= sub_select; + set_group_rpa= false; + group_sent= 0; + + outer_ref_cond= pseudo_bits_cond= NULL; + in_to_exists_where= NULL; + in_to_exists_having= NULL; + emb_sjm_nest= NULL; + sjm_lookup_tables= 0; + sjm_scan_tables= 0; + is_orig_degenerated= false; + with_ties_order_count= 0; +}; + + +static void trace_table_dependencies(THD *thd, + JOIN_TAB *join_tabs, uint table_count) +{ + DBUG_ASSERT(thd->trace_started()); + Json_writer_object trace_wrapper(thd); + Json_writer_array trace_dep(thd, "table_dependencies"); + + for (uint i= 0; i < table_count; i++) + { + TABLE_LIST *table_ref= join_tabs[i].tab_list; + Json_writer_object trace_one_table(thd); + trace_one_table.add_table_name(&join_tabs[i]); + trace_one_table.add("row_may_be_null", + (bool)table_ref->table->maybe_null); + const table_map map= table_ref->get_map(); + DBUG_ASSERT(map < (1ULL << table_count)); + for (uint j= 0; j < table_count; j++) + { + if (map & (1ULL << j)) + { + trace_one_table.add("map_bit", j); + break; + } + } + Json_writer_array depends_on(thd, "depends_on_map_bits"); + Table_map_iterator it(join_tabs[i].dependent); + uint dep_bit; + while ((dep_bit= it++) != Table_map_iterator::BITMAP_END) + depends_on.add(static_cast<longlong>(dep_bit)); + } +} + + +/** + This handles SELECT with and without UNION. +*/ + +bool handle_select(THD *thd, LEX *lex, select_result *result, + ulonglong setup_tables_done_option) +{ + bool res; + SELECT_LEX *select_lex= lex->first_select_lex(); + DBUG_ENTER("handle_select"); + MYSQL_SELECT_START(thd->query()); + + if (select_lex->master_unit()->is_unit_op() || + select_lex->master_unit()->fake_select_lex) + res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option); + else + { + SELECT_LEX_UNIT *unit= &lex->unit; + unit->set_limit(unit->global_parameters()); + /* + 'options' of mysql_select will be set in JOIN, as far as JOIN for + every PS/SP execution new, we will not need reset this flag if + setup_tables_done_option changed for next rexecution + */ + res= mysql_select(thd, + select_lex->table_list.first, + select_lex->item_list, + select_lex->where, + select_lex->order_list.elements + + select_lex->group_list.elements, + select_lex->order_list.first, + select_lex->group_list.first, + select_lex->having, + lex->proc_list.first, + select_lex->options | thd->variables.option_bits | + setup_tables_done_option, + result, unit, select_lex); + } + DBUG_PRINT("info",("res: %d is_error(): %d", res, + thd->is_error())); + res|= thd->is_error(); + if (unlikely(res)) + result->abort_result_set(); + if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors)) + { + /* + If LIMIT ROWS EXAMINED interrupted query execution, issue a warning, + continue with normal processing and produce an incomplete query result. + */ + bool saved_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning= false; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_QUERY_RESULT_INCOMPLETE, + ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE), + "LIMIT ROWS EXAMINED", + thd->lex->limit_rows_examined->val_uint()); + thd->abort_on_warning= saved_abort_on_warning; + thd->reset_killed(); + } + /* Disable LIMIT ROWS EXAMINED after query execution. */ + thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX; + + MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows); + DBUG_RETURN(res); +} + + +/** + Fix fields referenced from inner selects. + + @param thd Thread handle + @param all_fields List of all fields used in select + @param select Current select + @param ref_pointer_array Array of references to Items used in current select + @param group_list GROUP BY list (is NULL by default) + + @details + The function serves 3 purposes + + - adds fields referenced from inner query blocks to the current select list + + - Decides which class to use to reference the items (Item_ref or + Item_direct_ref) + + - fixes references (Item_ref objects) to these fields. + + If a field isn't already on the select list and the ref_pointer_array + is provided then it is added to the all_fields list and the pointer to + it is saved in the ref_pointer_array. + + The class to access the outer field is determined by the following rules: + + -#. If the outer field isn't used under an aggregate function then the + Item_ref class should be used. + + -#. If the outer field is used under an aggregate function and this + function is, in turn, aggregated in the query block where the outer + field was resolved or some query nested therein, then the + Item_direct_ref class should be used. Also it should be used if we are + grouping by a subquery that references this outer field. + + The resolution is done here and not at the fix_fields() stage as + it can be done only after aggregate functions are fixed and pulled up to + selects where they are to be aggregated. + + When the class is chosen it substitutes the original field in the + Item_outer_ref object. + + After this we proceed with fixing references (Item_outer_ref objects) to + this field from inner subqueries. + + @return Status + @retval true An error occurred. + @retval false OK. + */ + +bool +fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, + Ref_ptr_array ref_pointer_array) +{ + Item_outer_ref *ref; + + /* + Mark the references from the inner_refs_list that are occurred in + the group by expressions. Those references will contain direct + references to the referred fields. The markers are set in + the found_in_group_by field of the references from the list. + */ + List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list); + for (ORDER *group= select->join->group_list; group; group= group->next) + { + (*group->item)->walk(&Item::check_inner_refs_processor, TRUE, &ref_it); + } + + while ((ref= ref_it++)) + { + bool direct_ref= false; + Item *item= ref->outer_ref; + Item **item_ref= ref->ref; + Item_ref *new_ref; + /* + TODO: this field item already might be present in the select list. + In this case instead of adding new field item we could use an + existing one. The change will lead to less operations for copying fields, + smaller temporary tables and less data passed through filesort. + */ + if (!ref_pointer_array.is_null() && !ref->found_in_select_list) + { + int el= all_fields.elements; + ref_pointer_array[el]= item; + /* Add the field item to the select list of the current select. */ + all_fields.push_front(item, thd->mem_root); + /* + If it's needed reset each Item_ref item that refers this field with + a new reference taken from ref_pointer_array. + */ + item_ref= &ref_pointer_array[el]; + } + + if (ref->in_sum_func) + { + Item_sum *sum_func; + if (ref->in_sum_func->nest_level > select->nest_level) + direct_ref= TRUE; + else + { + for (sum_func= ref->in_sum_func; sum_func && + sum_func->aggr_level >= select->nest_level; + sum_func= sum_func->in_sum_func) + { + if (sum_func->aggr_level == select->nest_level) + { + direct_ref= TRUE; + break; + } + } + } + } + else if (ref->found_in_group_by) + direct_ref= TRUE; + + new_ref= direct_ref ? + new (thd->mem_root) Item_direct_ref(thd, ref->context, item_ref, ref->table_name, + ref->field_name, ref->alias_name_used) : + new (thd->mem_root) Item_ref(thd, ref->context, item_ref, ref->table_name, + ref->field_name, ref->alias_name_used); + if (!new_ref) + return TRUE; + ref->outer_ref= new_ref; + ref->ref= &ref->outer_ref; + + if (ref->fix_fields_if_needed(thd, 0)) + return TRUE; + thd->lex->used_tables|= item->used_tables(); + thd->lex->current_select->select_list_tables|= item->used_tables(); + } + return false; +} + +/** + The following clauses are redundant for subqueries: + + DISTINCT + GROUP BY if there are no aggregate functions and no HAVING + clause + + Because redundant clauses are removed both from JOIN and + select_lex, the removal is permanent. Thus, it only makes sense to + call this function for normal queries and on first execution of + SP/PS + + @param subq_select_lex select_lex that is part of a subquery + predicate. This object and the associated + join is modified. +*/ + +static +void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex) +{ + DBUG_ENTER("remove_redundant_subquery_clauses"); + Item_subselect *subq_predicate= subq_select_lex->master_unit()->item; + /* + The removal should happen for IN, ALL, ANY and EXISTS subqueries, + which means all but single row subqueries. Example single row + subqueries: + a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>) + b) SELECT a, (<single row subquery) FROM t1 + */ + if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS) + DBUG_VOID_RETURN; + + /* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */ + DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS || + subq_predicate->is_in_predicate()); + + if (subq_select_lex->options & SELECT_DISTINCT) + { + subq_select_lex->join->select_distinct= false; + subq_select_lex->options&= ~SELECT_DISTINCT; + DBUG_PRINT("info", ("DISTINCT removed")); + } + + /* + Remove GROUP BY if there are no aggregate functions and no HAVING + clause + */ + if (subq_select_lex->group_list.elements && + !subq_select_lex->with_sum_func && !subq_select_lex->join->having) + { + for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next) + { + /* + Do not remove the item if it is used in select list and then referred + from GROUP BY clause by its name or number. Example: + + select (select ... ) as SUBQ ... group by SUBQ + + Here SUBQ cannot be removed. + */ + if (!ord->in_field_list) + { + (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL); + /* + Remove from the JOIN::all_fields list any reference to the elements + of the eliminated GROUP BY list unless it is 'in_field_list'. + This is needed in order not to confuse JOIN::make_aggr_tables_info() + when it constructs different structure for execution phase. + */ + List_iterator<Item> li(subq_select_lex->join->all_fields); + Item *item; + while ((item= li++)) + { + if (item == *ord->item) + li.remove(); + } + } + } + subq_select_lex->join->group_list= NULL; + subq_select_lex->group_list.empty(); + DBUG_PRINT("info", ("GROUP BY removed")); + } + + /* + TODO: This would prevent processing quries with ORDER BY ... LIMIT + therefore we disable this optimization for now. + Remove GROUP BY if there are no aggregate functions and no HAVING + clause + if (subq_select_lex->group_list.elements && + !subq_select_lex->with_sum_func && !subq_select_lex->join->having) + { + subq_select_lex->join->group_list= NULL; + subq_select_lex->group_list.empty(); + } + */ + DBUG_VOID_RETURN; +} + + +/** + Function to setup clauses without sum functions. +*/ +static inline int +setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array, + TABLE_LIST *tables, + List<TABLE_LIST> &leaves, + List<Item> &fields, + List<Item> &all_fields, + COND **conds, + ORDER *order, + ORDER *group, + List<Window_spec> &win_specs, + List<Item_window_func> &win_funcs, + bool *hidden_group_fields) +{ + int res; + enum_parsing_place save_place; + st_select_lex *const select= thd->lex->current_select; + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + /* + Need to stave the value, so we can turn off only any new non_agg_field_used + additions coming from the WHERE + */ + const bool saved_non_agg_field_used= select->non_agg_field_used(); + DBUG_ENTER("setup_without_group"); + + thd->lex->allow_sum_func.clear_bit(select->nest_level); + res= setup_conds(thd, tables, leaves, conds); + + /* it's not wrong to have non-aggregated columns in a WHERE */ + select->set_non_agg_field_used(saved_non_agg_field_used); + + thd->lex->allow_sum_func.set_bit(select->nest_level); + + save_place= thd->lex->current_select->context_analysis_place; + thd->lex->current_select->context_analysis_place= IN_ORDER_BY; + res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields, + order); + thd->lex->allow_sum_func.clear_bit(select->nest_level); + thd->lex->current_select->context_analysis_place= IN_GROUP_BY; + res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields, + group, hidden_group_fields); + thd->lex->current_select->context_analysis_place= save_place; + thd->lex->allow_sum_func.set_bit(select->nest_level); + res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields, + win_specs, win_funcs); + thd->lex->allow_sum_func= save_allow_sum_func; + DBUG_RETURN(res); +} + +bool vers_select_conds_t::init_from_sysvar(THD *thd) +{ + vers_asof_timestamp_t &in= thd->variables.vers_asof_timestamp; + type= (vers_system_time_t) in.type; + delete_history= false; + start.unit= VERS_TIMESTAMP; + if (type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL) + { + DBUG_ASSERT(type == SYSTEM_TIME_AS_OF); + Datetime dt(in.unix_time, in.second_part, thd->variables.time_zone); + + start.item= new (thd->mem_root) + Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS); + if (!start.item) + return true; + } + else + start.item= NULL; + end.empty(); + return false; +} + +void vers_select_conds_t::print(String *str, enum_query_type query_type) const +{ + switch (orig_type) { + case SYSTEM_TIME_UNSPECIFIED: + break; + case SYSTEM_TIME_AS_OF: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME AS OF ")); + break; + case SYSTEM_TIME_FROM_TO: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME FROM ")); + end.print(str, query_type, STRING_WITH_LEN(" TO ")); + break; + case SYSTEM_TIME_BETWEEN: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BETWEEN ")); + end.print(str, query_type, STRING_WITH_LEN(" AND ")); + break; + case SYSTEM_TIME_BEFORE: + start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BEFORE ")); + break; + case SYSTEM_TIME_HISTORY: + // nothing to add + break; + case SYSTEM_TIME_ALL: + str->append(STRING_WITH_LEN(" FOR SYSTEM_TIME ALL")); + break; + } +} + +static +Item* period_get_condition(THD *thd, TABLE_LIST *table, SELECT_LEX *select, + vers_select_conds_t *conds, bool timestamp) +{ + DBUG_ASSERT(table); + DBUG_ASSERT(table->table); +#define newx new (thd->mem_root) + TABLE_SHARE *share= table->table->s; + const TABLE_SHARE::period_info_t *period= conds->period; + + const LEX_CSTRING &fstart= period->start_field(share)->field_name; + const LEX_CSTRING &fend= period->end_field(share)->field_name; + + conds->field_start= newx Item_field(thd, &select->context, + table->db, table->alias, + thd->strmake_lex_cstring(fstart)); + conds->field_end= newx Item_field(thd, &select->context, + table->db, table->alias, + thd->strmake_lex_cstring(fend)); + + Item *cond1= NULL, *cond2= NULL, *cond3= NULL, *curr= NULL; + if (timestamp) + { + MYSQL_TIME max_time; + switch (conds->type) + { + case SYSTEM_TIME_UNSPECIFIED: + case SYSTEM_TIME_HISTORY: + { + thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE); + max_time.second_part= TIME_MAX_SECOND_PART; + Datetime dt(&max_time); + curr= newx Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS); + if (conds->type == SYSTEM_TIME_UNSPECIFIED) + cond1= newx Item_func_eq(thd, conds->field_end, curr); + else + cond1= newx Item_func_lt(thd, conds->field_end, curr); + break; + } + case SYSTEM_TIME_AS_OF: + cond1= newx Item_func_le(thd, conds->field_start, conds->start.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + break; + case SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_lt(thd, conds->field_start, conds->end.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_le(thd, conds->field_start, conds->end.item); + cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item); + cond3= newx Item_func_le(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BEFORE: + cond1= newx Item_func_history(thd, conds->field_end); + cond2= newx Item_func_lt(thd, conds->field_end, conds->start.item); + break; + default: + DBUG_ASSERT(0); + } + } + else + { + DBUG_ASSERT(table->table->s && table->table->s->db_plugin); + + Item *trx_id0= conds->start.item; + Item *trx_id1= conds->end.item; + if (conds->start.item && conds->start.unit == VERS_TIMESTAMP) + { + bool backwards= conds->type != SYSTEM_TIME_AS_OF; + trx_id0= newx Item_func_trt_id(thd, conds->start.item, + TR_table::FLD_TRX_ID, backwards); + } + if (conds->end.item && conds->end.unit == VERS_TIMESTAMP) + { + trx_id1= newx Item_func_trt_id(thd, conds->end.item, + TR_table::FLD_TRX_ID, false); + } + + switch (conds->type) + { + case SYSTEM_TIME_UNSPECIFIED: + case SYSTEM_TIME_HISTORY: + curr= newx Item_int(thd, ULONGLONG_MAX); + if (conds->type == SYSTEM_TIME_UNSPECIFIED) + cond1= newx Item_func_eq(thd, conds->field_end, curr); + else + cond1= newx Item_func_lt(thd, conds->field_end, curr); + break; + DBUG_ASSERT(!conds->start.item); + DBUG_ASSERT(!conds->end.item); + break; + case SYSTEM_TIME_AS_OF: + cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id0, conds->field_start); + cond2= newx Item_func_trt_trx_sees(thd, conds->field_end, trx_id0); + DBUG_ASSERT(!conds->end.item); + break; + case SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_trt_trx_sees(thd, trx_id1, conds->field_start); + cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0); + cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id1, conds->field_start); + cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0); + cond3= newx Item_func_le(thd, conds->start.item, conds->end.item); + break; + case SYSTEM_TIME_BEFORE: + cond1= newx Item_func_history(thd, conds->field_end); + cond2= newx Item_func_trt_trx_sees(thd, trx_id0, conds->field_end); + break; + default: + DBUG_ASSERT(0); + } + } + + if (cond1) + { + cond1= and_items(thd, cond2, cond1); + cond1= and_items(thd, cond3, cond1); + } + return cond1; +} + +static +bool skip_setup_conds(THD *thd) +{ + return (!thd->stmt_arena->is_conventional() + && !thd->stmt_arena->is_stmt_prepare_or_first_sp_execute()) + || thd->lex->is_view_context_analysis(); +} + +int SELECT_LEX::period_setup_conds(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("SELECT_LEX::period_setup_conds"); + const bool update_conds= !skip_setup_conds(thd); + + Query_arena backup; + Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup); + + DBUG_ASSERT(!tables->next_local && tables->table); + + Item *result= NULL; + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (!table->table) + continue; + vers_select_conds_t &conds= table->period_conditions; + if (!table->table->s->period.name.streq(conds.name)) + { + my_error(ER_PERIOD_NOT_FOUND, MYF(0), conds.name.str); + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(-1); + } + + if (update_conds) + { + conds.period= &table->table->s->period; + result= and_items(thd, result, + period_get_condition(thd, table, this, &conds, true)); + } + } + if (update_conds) + where= and_items(thd, where, result); + + if (arena) + thd->restore_active_arena(arena, &backup); + + DBUG_RETURN(0); +} + +int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("SELECT_LEX::vers_setup_conds"); + const bool update_conds= !skip_setup_conds(thd); + + if (!versioned_tables) + { + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + versioned_tables++; + else if (table->vers_conditions.is_set() && + (table->is_non_derived() || !table->vers_conditions.used)) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + } + } + + if (versioned_tables == 0) + DBUG_RETURN(0); + + /* For prepared statements we create items on statement arena, + because they must outlive execution phase for multiple executions. */ + Query_arena_stmt on_stmt_arena(thd); + + // find outer system_time + SELECT_LEX *outer_slex= outer_select(); + TABLE_LIST* outer_table= NULL; + + if (outer_slex) + { + TABLE_LIST* derived= master_unit()->derived; + // inner SELECT may not be a derived table (derived == NULL) + while (derived && outer_slex && !derived->vers_conditions.is_set()) + { + derived= outer_slex->master_unit()->derived; + outer_slex= outer_slex->outer_select(); + } + if (derived && outer_slex) + { + DBUG_ASSERT(derived->vers_conditions.is_set()); + outer_table= derived; + } + } + + bool is_select= false; + bool use_sysvar= false; + switch (thd->lex->sql_command) + { + case SQLCOM_SELECT: + use_sysvar= true; + /* fall through */ + case SQLCOM_CREATE_TABLE: + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE_MULTI: + is_select= true; + default: + break; + } + + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + if (!table->table || table->is_view() || !table->table->versioned()) + continue; + + vers_select_conds_t &vers_conditions= table->vers_conditions; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* + if the history is stored in partitions, then partitions + themselves are not versioned + */ + if (table->partition_names && table->table->part_info->vers_info) + { + /* If the history is stored in partitions, then partitions + themselves are not versioned. */ + if (vers_conditions.was_set()) + { + my_error(ER_VERS_QUERY_IN_PARTITION, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + else if (!vers_conditions.is_set()) + vers_conditions.set_all(); + } +#endif + + if (outer_table && !vers_conditions.is_set()) + { + // propagate system_time from nearest outer SELECT_LEX + vers_conditions= outer_table->vers_conditions; + outer_table->vers_conditions.used= true; + } + + // propagate system_time from sysvar + if (!vers_conditions.is_set() && use_sysvar) + { + if (vers_conditions.init_from_sysvar(thd)) + DBUG_RETURN(-1); + } + + if (vers_conditions.is_set()) + { + if (vers_conditions.was_set() && + table->lock_type >= TL_FIRST_WRITE && + !vers_conditions.delete_history) + { + my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str); + DBUG_RETURN(-1); + } + + if (vers_conditions.type == SYSTEM_TIME_ALL) + continue; + } + + bool timestamps_only= table->table->versioned(VERS_TIMESTAMP); + + if (vers_conditions.is_set() && vers_conditions.type != SYSTEM_TIME_HISTORY) + { + thd->where= "FOR SYSTEM_TIME"; + /* TODO: do resolve fix_length_and_dec(), fix_fields(). This requires + storing vers_conditions as Item and make some magic related to + vers_system_time_t/VERS_TRX_ID at stage of fix_fields() + (this is large refactoring). */ + if (vers_conditions.check_units(thd)) + DBUG_RETURN(-1); + if (timestamps_only && (vers_conditions.start.unit == VERS_TRX_ID || + vers_conditions.end.unit == VERS_TRX_ID)) + { + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name.str); + DBUG_RETURN(-1); + } + } + + if (update_conds) + { + vers_conditions.period = &table->table->s->vers; + Item *cond= period_get_condition(thd, table, this, &vers_conditions, + timestamps_only); + if (is_select) + table->on_expr= and_items(thd, table->on_expr, cond); + else + { + if (join) + { + where= and_items(thd, join->conds, cond); + join->conds= where; + } + else + where= and_items(thd, where, cond); + table->where= and_items(thd, table->where, cond); + } + + table->vers_conditions.set_all(); + } + } // for (table= tables; ...) + + DBUG_RETURN(0); +} + + +/***************************************************************************** + Check fields, find best join, do the select and output fields. + mysql_select assumes that all tables are already opened +*****************************************************************************/ + +/* + Check if we have a field reference. If yes, we have to use + mixed_implicit_grouping. +*/ + +static bool check_list_for_field(List<Item> *items) +{ + List_iterator_fast <Item> select_it(*items); + Item *select_el; + + while ((select_el= select_it++)) + { + if (select_el->with_field()) + return true; + } + return false; +} + +static bool check_list_for_field(ORDER *order) +{ + for (; order; order= order->next) + { + if (order->item[0]->with_field()) + return true; + } + return false; +} + + +/** + Prepare of whole select (including sub queries in future). + + @todo + Add check of calculation of GROUP functions and fields: + SELECT COUNT(*)+table.col1 from table1; + + @retval + -1 on error + @retval + 0 on success +*/ +int +JOIN::prepare(TABLE_LIST *tables_init, COND *conds_init, uint og_num, + ORDER *order_init, bool skip_order_by, + ORDER *group_init, Item *having_init, + ORDER *proc_param_init, SELECT_LEX *select_lex_arg, + SELECT_LEX_UNIT *unit_arg) +{ + DBUG_ENTER("JOIN::prepare"); + + // to prevent double initialization on EXPLAIN + if (optimization_state != JOIN::NOT_OPTIMIZED) + DBUG_RETURN(0); + + conds= conds_init; + order= order_init; + group_list= group_init; + having= having_init; + proc_param= proc_param_init; + tables_list= tables_init; + select_lex= select_lex_arg; + DBUG_PRINT("info", ("select %p (%u) = JOIN %p", + select_lex, select_lex->select_number, this)); + select_lex->join= this; + join_list= &select_lex->top_join_list; + union_part= unit_arg->is_unit_op(); + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_prepare(thd, "join_preparation"); + trace_prepare.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + // simple check that we got usable conds + dbug_print_item(conds); + + /* Fix items that requires the join structure to exist */ + fix_items_after_optimize(thd, select_lex); + + /* + It is hack which force creating EXPLAIN object always on runt-time arena + (because very top JOIN::prepare executes always with runtime arena, but + constant subquery like (SELECT 'x') can be called with statement arena + during prepare phase of top SELECT). + */ + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE)) + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + + if (select_lex->handle_derived(thd->lex, DT_PREPARE)) + DBUG_RETURN(-1); + + thd->lex->current_select->context_analysis_place= NO_MATTER; + thd->lex->current_select->is_item_list_lookup= 1; + /* + If we have already executed SELECT, then it have not sense to prevent + its table from update (see unique_table()) + Affects only materialized derived tables. + */ + /* Check that all tables, fields, conds and order are ok */ + if (!(select_options & OPTION_SETUP_TABLES_DONE) && + setup_tables_and_check_access(thd, &select_lex->context, join_list, + tables_list, select_lex->leaf_tables, + FALSE, SELECT_ACL, SELECT_ACL, FALSE)) + DBUG_RETURN(-1); + + /* System Versioning: handle FOR SYSTEM_TIME clause. */ + if (select_lex->vers_setup_conds(thd, tables_list) < 0) + DBUG_RETURN(-1); + + /* + mixed_implicit_grouping will be set to TRUE if the SELECT list + mixes elements with and without grouping, and there is no GROUP BY + clause. + Mixing non-aggregated fields with aggregate functions in the + SELECT list or HAVING is a MySQL extension that is allowed only if + the ONLY_FULL_GROUP_BY sql mode is not set. + */ + mixed_implicit_grouping= false; + if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) && + select_lex->with_sum_func && !group_list) + { + if (check_list_for_field(&fields_list) || + check_list_for_field(order)) + { + List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables); + + mixed_implicit_grouping= true; // mark for future + + while (TABLE_LIST *tbl= li++) + { + /* + If the query uses implicit grouping where the select list + contains both aggregate functions and non-aggregate fields, + any non-aggregated field may produce a NULL value. Set all + fields of each table as nullable before semantic analysis to + take into account this change of nullability. + + Note: this loop doesn't touch tables inside merged + semi-joins, because subquery-to-semijoin conversion has not + been done yet. This is intended. + */ + if (tbl->table) + tbl->table->maybe_null= 1; + } + } + } + table_count= select_lex->leaf_tables.elements; + + uint real_og_num= og_num; + if (skip_order_by && + select_lex != select_lex->master_unit()->global_parameters()) + real_og_num+= select_lex->order_list.elements; + + DBUG_ASSERT(select_lex->hidden_bit_fields == 0); + if (setup_wild(thd, tables_list, fields_list, &all_fields, select_lex, false)) + DBUG_RETURN(-1); + + if (thd->lex->current_select->first_cond_optimization) + { + if ( conds && ! thd->lex->current_select->merged_into) + select_lex->select_n_reserved= conds->exists2in_reserved_items(); + else + select_lex->select_n_reserved= 0; + } + + if (select_lex->setup_ref_array(thd, real_og_num)) + DBUG_RETURN(-1); + + ref_ptrs= ref_ptr_array_slice(0); + + enum_parsing_place save_place= + thd->lex->current_select->context_analysis_place; + thd->lex->current_select->context_analysis_place= SELECT_LIST; + + { + List_iterator_fast<TABLE_LIST> it(select_lex->leaf_tables); + while (TABLE_LIST *tbl= it++) + { + if (tbl->table_function && + tbl->table_function->setup(thd, tbl, select_lex_arg)) + DBUG_RETURN(-1); + } + } + + if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ, + &all_fields, &select_lex->pre_fix, 1)) + DBUG_RETURN(-1); + thd->lex->current_select->context_analysis_place= save_place; + + if (setup_without_group(thd, ref_ptrs, tables_list, + select_lex->leaf_tables, fields_list, + all_fields, &conds, order, group_list, + select_lex->window_specs, + select_lex->window_funcs, + &hidden_group_fields)) + DBUG_RETURN(-1); + + /* + Permanently remove redundant parts from the query if + 1) This is a subquery + 2) This is the first time this query is optimized (since the + transformation is permanent + 3) Not normalizing a view. Removal should take place when a + query involving a view is optimized, not when the view + is created + */ + if (select_lex->master_unit()->item && // 1) + select_lex->first_cond_optimization && // 2) + !thd->lex->is_view_context_analysis()) // 3) + { + remove_redundant_subquery_clauses(select_lex); + } + + /* Resolve the ORDER BY that was skipped, then remove it. */ + if (skip_order_by && select_lex != + select_lex->master_unit()->global_parameters()) + { + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + thd->lex->allow_sum_func.set_bit(select_lex->nest_level); + thd->where= "order clause"; + for (ORDER *order= select_lex->order_list.first; order; order= order->next) + { + /* Don't add the order items to all fields. Just resolve them to ensure + the query is valid, we'll drop them immediately after. */ + if (find_order_in_list(thd, ref_ptrs, tables_list, order, + fields_list, all_fields, false, false, false)) + DBUG_RETURN(-1); + } + thd->lex->allow_sum_func= save_allow_sum_func; + select_lex->order_list.empty(); + } + + if (having) + { + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; + thd->where="having clause"; + thd->lex->allow_sum_func.set_bit(select_lex_arg->nest_level); + select_lex->having_fix_field= 1; + /* + Wrap alone field in HAVING clause in case it will be outer field + of subquery which need persistent pointer on it, but having + could be changed by optimizer + */ + if (having->type() == Item::REF_ITEM && + ((Item_ref *)having)->ref_type() == Item_ref::REF) + wrap_ident(thd, &having); + bool having_fix_rc= having->fix_fields_if_needed_for_bool(thd, &having); + select_lex->having_fix_field= 0; + + if (unlikely(having_fix_rc || thd->is_error())) + DBUG_RETURN(-1); /* purecov: inspected */ + thd->lex->allow_sum_func= save_allow_sum_func; + + if (having->with_window_func()) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + DBUG_RETURN(-1); + } + } + + /* + After setting up window functions, we may have discovered additional + used tables from the PARTITION BY and ORDER BY list. Update all items + that contain window functions. + */ + if (select_lex->have_window_funcs()) + { + List_iterator_fast<Item> it(select_lex->item_list); + Item *item; + while ((item= it++)) + { + if (item->with_window_func()) + item->update_used_tables(); + } + } + + With_clause *with_clause=select_lex->get_with_clause(); + if (with_clause && with_clause->prepare_unreferenced_elements(thd)) + DBUG_RETURN(1); + + With_element *with_elem= select_lex->get_with_element(); + if (with_elem && + select_lex->check_unrestricted_recursive( + thd->variables.only_standard_compliant_cte)) + DBUG_RETURN(-1); + if (!(select_lex->changed_elements & TOUCHED_SEL_COND)) + select_lex->check_subqueries_with_recursive_references(); + + int res= check_and_do_in_subquery_rewrites(this); + + select_lex->fix_prepare_information(thd, &conds, &having); + + if (res) + DBUG_RETURN(res); + + if (order) + { + bool requires_sorting= FALSE; + /* + WITH TIES forces the results to be sorted, even if it's not sanely + sortable. + */ + if (select_lex->limit_params.with_ties) + requires_sorting= true; + + /* + Go through each ORDER BY item and perform the following: + 1. Detect if none of the items contain meaningful data, which means we + can drop the sorting altogether. + 2. Split any columns with aggregation functions or window functions into + their base components and store them as separate fields. + (see split_sum_func) for more details. + */ + for (ORDER *ord= order; ord; ord= ord->next) + { + Item *item= *ord->item; + /* + Disregard sort order if there's only + zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or + zero length NOT NULL string functions there. + Such tuples don't contain any data to sort. + */ + if (!requires_sorting && + /* Not a zero length NOT NULL field */ + ((item->type() != Item::FIELD_ITEM || + ((Item_field *) item)->field->maybe_null() || + ((Item_field *) item)->field->sort_length()) && + /* AND not a zero length NOT NULL string function. */ + (item->type() != Item::FUNC_ITEM || + item->maybe_null() || + item->result_type() != STRING_RESULT || + item->max_length))) + requires_sorting= TRUE; + + if ((item->with_sum_func() && item->type() != Item::SUM_FUNC_ITEM) || + item->with_window_func()) + item->split_sum_func(thd, ref_ptrs, all_fields, SPLIT_SUM_SELECT); + } + /* Drop the ORDER BY clause if none of the columns contain any data that + can produce a meaningful sorted set. */ + if (!requires_sorting) + order= NULL; + } + else + { + /* The current select does not have an ORDER BY */ + if (select_lex->limit_params.with_ties) + { + my_error(ER_WITH_TIES_NEEDS_ORDER, MYF(0)); + DBUG_RETURN(-1); + } + } + + if (having && (having->with_sum_func() || having->with_rownum_func())) + having->split_sum_func2(thd, ref_ptrs, all_fields, + &having, SPLIT_SUM_SKIP_REGISTERED); + if (select_lex->inner_sum_func_list) + { + Item_sum *end=select_lex->inner_sum_func_list; + Item_sum *item_sum= end; + do + { + item_sum= item_sum->next; + item_sum->split_sum_func2(thd, ref_ptrs, + all_fields, item_sum->ref_by, 0); + } while (item_sum != end); + } + + if (select_lex->inner_refs_list.elements && + fix_inner_refs(thd, all_fields, select_lex, ref_ptrs)) + DBUG_RETURN(-1); + + if (group_list) + { + /* + Because HEAP tables can't index BIT fields we need to use an + additional hidden field for grouping because later it will be + converted to a LONG field. Original field will remain of the + BIT type and will be returned to a client. + */ + for (ORDER *ord= group_list; ord; ord= ord->next) + { + if ((*ord->item)->type() == Item::FIELD_ITEM && + (*ord->item)->field_type() == MYSQL_TYPE_BIT) + { + Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item); + if (!field) + DBUG_RETURN(-1); + int el= all_fields.elements; + ref_ptrs[el]= field; + all_fields.push_front(field, thd->mem_root); + ord->item= &ref_ptrs[el]; + } + } + } + + /* + Check if there are references to un-aggregated columns when computing + aggregate functions with implicit grouping (there is no GROUP BY). + */ + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list && + !(select_lex->master_unit()->item && + select_lex->master_unit()->item->is_in_predicate() && + select_lex->master_unit()->item->get_IN_subquery()-> + test_set_strategy(SUBS_MAXMIN_INJECTED)) && + select_lex->non_agg_field_used() && + select_lex->agg_func_used()) + { + my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS, + ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0)); + DBUG_RETURN(-1); + } + { + /* Caclulate the number of groups */ + send_group_parts= 0; + for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next) + send_group_parts++; + } + + procedure= setup_procedure(thd, proc_param, result, fields_list, &error); + if (unlikely(error)) + goto err; /* purecov: inspected */ + if (procedure) + { + if (setup_new_fields(thd, fields_list, all_fields, + procedure->param_fields)) + goto err; /* purecov: inspected */ + if (procedure->group) + { + if (!test_if_subpart(procedure->group,group_list)) + { /* purecov: inspected */ + my_message(ER_DIFF_GROUPS_PROC, ER_THD(thd, ER_DIFF_GROUPS_PROC), + MYF(0)); /* purecov: inspected */ + goto err; /* purecov: inspected */ + } + } + if (order && (procedure->flags & PROC_NO_SORT)) + { /* purecov: inspected */ + my_message(ER_ORDER_WITH_PROC, ER_THD(thd, ER_ORDER_WITH_PROC), + MYF(0)); /* purecov: inspected */ + goto err; /* purecov: inspected */ + } + if (thd->lex->derived_tables) + { + /* + Queries with derived tables and PROCEDURE are not allowed. + Many of such queries are disallowed grammatically, but there + are still some complex cases: + SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE() + */ + my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", + thd->lex->derived_tables & DERIVED_VIEW ? + "view" : "subquery"); + goto err; + } + if (thd->lex->sql_command != SQLCOM_SELECT) + { + // EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE() + my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT"); + goto err; + } + } + + if (thd->trace_started()) + { + Json_writer_object trace_wrapper(thd); + opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper); + } + + if (!procedure && result && result->prepare(fields_list, unit_arg)) + goto err; /* purecov: inspected */ + + unit= unit_arg; + if (prepare_stage2()) + goto err; + + DBUG_RETURN(0); // All OK + +err: + delete procedure; /* purecov: inspected */ + procedure= 0; + DBUG_RETURN(-1); /* purecov: inspected */ +} + + +/** + Second phase of prepare where we collect some statistic. + + @details + We made this part separate to be able recalculate some statistic after + transforming subquery on optimization phase. +*/ + +bool JOIN::prepare_stage2() +{ + bool res= TRUE; + DBUG_ENTER("JOIN::prepare_stage2"); + + /* Init join struct */ + count_field_types(select_lex, &tmp_table_param, all_fields, 0); + this->group= group_list != 0; + + if (tmp_table_param.sum_func_count && !group_list) + { + implicit_grouping= TRUE; + // Result will contain zero or one row - ordering is meaningless + order= NULL; + } + +#ifdef RESTRICTED_GROUP + if (implicit_grouping) + { + my_message(ER_WRONG_SUM_SELECT,ER_THD(thd, ER_WRONG_SUM_SELECT),MYF(0)); + goto err; + } +#endif + if (select_lex->olap == ROLLUP_TYPE && rollup_init()) + goto err; + if (alloc_func_list() || + make_sum_func_list(all_fields, fields_list, false)) + goto err; + + res= FALSE; +err: + DBUG_RETURN(res); /* purecov: inspected */ +} + + +bool JOIN::build_explain() +{ + DBUG_ENTER("JOIN::build_explain"); + have_query_plan= QEP_AVAILABLE; + + /* + explain data must be created on the Explain_query::mem_root. Because it's + just a memroot, not an arena, explain data must not contain any Items + */ + MEM_ROOT *old_mem_root= thd->mem_root; + Item *old_free_list __attribute__((unused))= thd->free_list; + thd->mem_root= thd->lex->explain->mem_root; + bool res= save_explain_data(thd->lex->explain, false /* can overwrite */, + need_tmp, + !skip_sort_order && !no_order && (order || group_list), + select_distinct); + thd->mem_root= old_mem_root; + DBUG_ASSERT(thd->free_list == old_free_list); // no Items were created + if (res) + DBUG_RETURN(1); + uint select_nr= select_lex->select_number; + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (select_nr == FAKE_SELECT_LEX_ID) + { + /* this is a fake_select_lex of a union */ + select_nr= select_lex->master_unit()->first_select()->select_number; + curr_tab->tracker= thd->lex->explain->get_union(select_nr)-> + get_tmptable_read_tracker(); + } + else if (select_nr < INT_MAX) + { + Explain_select *tmp= thd->lex->explain->get_select(select_nr); + if (tmp) + curr_tab->tracker= tmp->get_using_temporary_read_tracker(); + } + } + DBUG_RETURN(0); +} + + +int JOIN::optimize() +{ + int res= 0; + join_optimization_state init_state= optimization_state; + if (select_lex->pushdown_select) + { + // Do same as JOIN::optimize_inner does: + fields= &select_lex->item_list; + + if (!(select_options & SELECT_DESCRIBE)) + { + /* Prepare to execute the query pushed into a foreign engine */ + res= select_lex->pushdown_select->prepare(); + } + with_two_phase_optimization= false; + } + else if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE) + res= optimize_stage2(); + else + { + // to prevent double initialization on EXPLAIN + if (optimization_state != JOIN::NOT_OPTIMIZED) + return FALSE; + optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS; + res= optimize_inner(); + } + if (!with_two_phase_optimization || + init_state == JOIN::OPTIMIZATION_PHASE_1_DONE) + { + if (!res && have_query_plan != QEP_DELETED) + res= build_explain(); + optimization_state= JOIN::OPTIMIZATION_DONE; + } + return res; +} + + +/** + @brief + Create range filters objects needed in execution for all join tables + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function creates a Rowid_filter object + for this range filter. In order to do this the function first constructs + a quick select to scan the range for this range filter. Then it creates + a container for the range filter and finally constructs a Range_rowid_filter + object a pointer to which is set in the field JOIN_TAB::rowid_filter of + the joined table. + + @retval false Ok + @retval true Error +*/ + +bool JOIN::make_range_rowid_filters() +{ + DBUG_ENTER("make_range_rowid_filters"); + + /* + Do not build range filters with detected impossible WHERE. + Anyway conditions cannot be used anymore to extract ranges for filters. + */ + if (const_table_map != found_const_table_map) + DBUG_RETURN(0); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->range_rowid_filter_info) + continue; + + DBUG_ASSERT(!(tab->ref.key >= 0 && + tab->ref.key == (int) tab->range_rowid_filter_info->key_no)); + DBUG_ASSERT(!(tab->ref.key == -1 && tab->quick && + tab->quick->index == tab->range_rowid_filter_info->key_no)); + + int err; + SQL_SELECT *sel= NULL; + Rowid_filter_container *filter_container= NULL; + Item **sargable_cond= get_sargable_cond(this, tab->table); + sel= make_select(tab->table, const_table_map, const_table_map, + *sargable_cond, (SORT_INFO*) 0, 1, &err); + if (!sel) + continue; + + key_map filter_map; + filter_map.clear_all(); + filter_map.set_bit(tab->range_rowid_filter_info->key_no); + filter_map.merge(tab->table->with_impossible_ranges); + bool force_index_save= tab->table->force_index; + tab->table->force_index= true; + quick_select_return rc; + rc= sel->test_quick_select(thd, filter_map, (table_map) 0, + (ha_rows) HA_POS_ERROR, true, false, true, + true); + tab->table->force_index= force_index_save; + if (rc == SQL_SELECT::ERROR || thd->is_error()) + { + DBUG_RETURN(true); /* Fatal error */ + } + /* + If SUBS_IN_TO_EXISTS strtrategy is chosen for the subquery then + additional conditions are injected into WHERE/ON/HAVING and it may + happen that the call of test_quick_select() discovers impossible range. + */ + if (rc == SQL_SELECT::IMPOSSIBLE_RANGE) + { + const_table_map|= tab->table->map; + goto no_filter; + } + DBUG_ASSERT(sel->quick); + filter_container= + tab->range_rowid_filter_info->create_container(); + if (filter_container) + { + tab->rowid_filter= + new (thd->mem_root) Range_rowid_filter(tab->table, + tab->range_rowid_filter_info, + filter_container, sel); + if (tab->rowid_filter) + continue; + } + no_filter: + if (sel->quick) + delete sel->quick; + delete sel; + } + + DBUG_RETURN(0); +} + + +/** + @brief + Allocate memory the rowid containers of the used the range filters + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function allocate memory for the + rowid container employed by the filter. On success it lets the table engine + know that what rowid filter will be used when accessing the table rows. + + @retval false always +*/ + +bool +JOIN::init_range_rowid_filters() +{ + DBUG_ENTER("init_range_rowid_filters"); + + JOIN_TAB *tab; + + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (!tab->rowid_filter) + continue; + if (tab->rowid_filter->get_container()->alloc()) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + continue; + } + tab->table->file->rowid_filter_push(tab->rowid_filter); + tab->is_rowid_filter_built= false; + } + DBUG_RETURN(0); +} + +/** + global select optimisation. + + @note + error code saved in field 'error' + + @retval + 0 success + @retval + 1 error +*/ + +int +JOIN::optimize_inner() +{ + DBUG_ENTER("JOIN::optimize_inner"); + subq_exit_fl= false; + + DEBUG_SYNC(thd, "before_join_optimize"); + THD_STAGE_INFO(thd, stage_optimizing); +#ifndef DBUG_OFF + dbug_join_tab_array_size= 0; +#endif + + // rownum used somewhere in query, no limits and it is derived + if (unlikely(thd->lex->with_rownum && + select_lex->first_cond_optimization && + select_lex->master_unit()->derived)) + optimize_upper_rownum_func(); + + do_send_rows = (unit->lim.get_select_limit()) ? 1 : 0; + + set_allowed_join_cache_types(); + need_distinct= TRUE; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_prepare(thd, "join_optimization"); + trace_prepare.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + /* + Needed in case optimizer short-cuts, + set properly in make_aggr_tables_info() + */ + fields= &select_lex->item_list; + + if (select_lex->first_cond_optimization) + { + //Do it only for the first execution + /* Merge all mergeable derived tables/views in this SELECT. */ + if (select_lex->handle_derived(thd->lex, DT_MERGE)) + DBUG_RETURN(TRUE); + } + + if (select_lex->first_cond_optimization && + transform_in_predicates_into_in_subq(thd)) + DBUG_RETURN(1); + + /* + Update used tables after all handling derived table procedures + After this call, select_lex->select_list_tables contains the table + bits of all items in the select list (but not bits from WHERE clause or + other items). + */ + select_lex->update_used_tables(); + + /* + In fact we transform underlying subqueries after their 'prepare' phase and + before 'optimize' from upper query 'optimize' to allow semijoin + conversion happened (which done in the same way. + */ + if (select_lex->first_cond_optimization && + conds && conds->walk(&Item::exists2in_processor, 0, thd)) + DBUG_RETURN(1); + /* + TODO + make view to decide if it is possible to write to WHERE directly or make Semi-Joins able to process ON condition if it is possible + for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local) + { + if (tbl->on_expr && + tbl->on_expr->walk(&Item::exists2in_processor, 0, thd)) + DBUG_RETURN(1); + } + */ + + if (transform_max_min_subquery()) + DBUG_RETURN(1); /* purecov: inspected */ + + if (select_lex->first_cond_optimization) + { + /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ + if (convert_join_subqueries_to_semijoins(this)) + DBUG_RETURN(1); /* purecov: inspected */ + /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ + select_lex->update_used_tables(); + } + + eval_select_list_used_tables(); + + if (select_lex->options & OPTION_SCHEMA_TABLE && + optimize_schema_tables_memory_usage(select_lex->leaf_tables)) + DBUG_RETURN(1); + + if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */ + DBUG_RETURN(-1); + + row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR : + unit->lim.get_select_limit()); + /* select_limit is used to decide if we are likely to scan the whole table */ + select_limit= unit->lim.get_select_limit(); + if (having || (select_options & OPTION_FOUND_ROWS)) + select_limit= HA_POS_ERROR; +#ifdef HAVE_REF_TO_FIELDS // Not done yet + /* Add HAVING to WHERE if possible */ + if (having && !group_list && !sum_func_count) + { + if (!conds) + { + conds= having; + having= 0; + } + else if ((conds=new (thd->mem_root) Item_cond_and(conds,having))) + { + /* + Item_cond_and can't be fixed after creation, so we do not check + conds->fixed() + */ + conds->fix_fields(thd, &conds); + conds->change_ref_to_fields(thd, tables_list); + conds->top_level_item(); + having= 0; + } + } +#endif + + SELECT_LEX *sel= select_lex; + if (sel->first_cond_optimization) + { + /* + The following code will allocate the new items in a permanent + MEMROOT for prepared statements and stored procedures. + + But first we need to ensure that thd->lex->explain is allocated + in the execution arena + */ + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + sel->first_cond_optimization= 0; + + /* Convert all outer joins to inner joins if possible */ + conds= simplify_joins(this, join_list, conds, TRUE, FALSE); + + add_table_function_dependencies(join_list, table_map(-1)); + + if (thd->is_error() || select_lex->save_leaf_tables(thd)) + { + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(1); + } + build_bitmap_for_nested_joins(join_list, 0); + + sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0; + + sel->where= conds; + + select_lex->update_used_tables(); + + if (arena) + thd->restore_active_arena(arena, &backup); + } + + if (!allowed_top_level_tables) + calc_allowed_top_level_tables(select_lex); + + if (optimize_constant_subqueries()) + DBUG_RETURN(1); + + if (conds && conds->with_subquery()) + (void) conds->walk(&Item::cleanup_is_expensive_cache_processor, + 0, (void *) 0); + if (having && having->with_subquery()) + (void) having->walk(&Item::cleanup_is_expensive_cache_processor, + 0, (void *) 0); + + List<Item> eq_list; + + if (setup_degenerate_jtbm_semi_joins(this, join_list, eq_list)) + DBUG_RETURN(1); + + if (eq_list.elements != 0) + { + Item *new_cond; + + if (eq_list.elements == 1) + new_cond= eq_list.pop(); + else + new_cond= new (thd->mem_root) Item_cond_and(thd, eq_list); + + if (new_cond && + ((new_cond->fix_fields(thd, &new_cond) || + !(conds= and_items(thd, conds, new_cond)) || + conds->fix_fields(thd, &conds)))) + DBUG_RETURN(TRUE); + } + eq_list.empty(); + + if (select_lex->cond_pushed_into_where) + { + conds= and_conds(thd, conds, select_lex->cond_pushed_into_where); + if (conds && conds->fix_fields(thd, &conds)) + DBUG_RETURN(1); + } + if (select_lex->cond_pushed_into_having) + { + having= and_conds(thd, having, select_lex->cond_pushed_into_having); + if (having) + { + select_lex->having_fix_field= 1; + select_lex->having_fix_field_for_pushed_cond= 1; + if (having->fix_fields(thd, &having)) + DBUG_RETURN(1); + select_lex->having_fix_field= 0; + select_lex->having_fix_field_for_pushed_cond= 0; + } + } + + bool ignore_on_expr= false; + /* + PS/SP note: on_expr of versioned table can not be reallocated + (see build_equal_items() below) because it can be not rebuilt + at second invocation. + */ + if (!thd->stmt_arena->is_conventional() && thd->mem_root != thd->stmt_arena->mem_root) + for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local) + if (tbl->table && tbl->on_expr && tbl->table->versioned()) + { + ignore_on_expr= true; + break; + } + + transform_in_predicates_into_equalities(thd); + + conds= optimize_cond(this, conds, join_list, ignore_on_expr, + &cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS); + + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from optimize_cond")); + DBUG_RETURN(1); + } + if (select_lex->with_rownum && ! order && ! group_list && + !select_distinct && conds && select_lex == unit->global_parameters()) + optimize_rownum(thd, unit, conds); + + having= optimize_cond(this, having, join_list, TRUE, + &having_value, &having_equal); + + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from optimize_cond")); + DBUG_RETURN(1); + } + + /* Do not push into WHERE from HAVING if cond_value == Item::COND_FALSE */ + + if (thd->lex->sql_command == SQLCOM_SELECT && + optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING) && + cond_value != Item::COND_FALSE) + { + having= + select_lex->pushdown_from_having_into_where(thd, having); + if (select_lex->attach_to_conds.elements != 0) + { + conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal, + select_lex->attach_to_conds, + &cond_value); + sel->attach_to_conds.empty(); + } + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY)) + { + TABLE_LIST *tbl; + List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables); + while ((tbl= li++)) + if (tbl->jtbm_subselect) + { + if (tbl->jtbm_subselect->pushdown_cond_for_in_subquery(thd, conds)) + DBUG_RETURN(1); + } + } + + if (setup_jtbm_semi_joins(this, join_list, eq_list)) + DBUG_RETURN(1); + + if (eq_list.elements != 0) + { + conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal, + eq_list, &cond_value); + + if (!conds && + cond_value != Item::COND_FALSE && cond_value != Item::COND_TRUE) + DBUG_RETURN(TRUE); + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED)) + { + TABLE_LIST *tbl; + List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables); + while ((tbl= li++)) + { + /* + Do not push conditions from where into materialized inner tables + of outer joins: this is not valid. + */ + if (tbl->is_materialized_derived()) + { + JOIN *join= tbl->get_unit()->first_select()->join; + if (join && + join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE && + join->with_two_phase_optimization) + continue; + /* + Do not push conditions from where into materialized inner tables + of outer joins: this is not valid. + */ + if (!tbl->is_inner_table_of_outer_join()) + { + if (pushdown_cond_for_derived(thd, conds, tbl)) + DBUG_RETURN(1); + } + if (mysql_handle_single_derived(thd->lex, tbl, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + } + } + else + { + /* Run optimize phase for all derived tables/views used in this SELECT. */ + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + { + if (select_lex->where) + { + select_lex->cond_value= cond_value; + if (sel->where != conds && cond_value == Item::COND_OK) + thd->change_item_tree(&sel->where, conds); + } + if (select_lex->having) + { + select_lex->having_value= having_value; + if (sel->having != having && having_value == Item::COND_OK) + thd->change_item_tree(&sel->having, having); + } + if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE || + (!unit->lim.get_select_limit() && + !(select_options & OPTION_FOUND_ROWS))) + { /* Impossible cond */ + if (unit->lim.get_select_limit()) + { + DBUG_PRINT("info", (having_value == Item::COND_FALSE ? + "Impossible HAVING" : "Impossible WHERE")); + zero_result_cause= having_value == Item::COND_FALSE ? + "Impossible HAVING" : "Impossible WHERE"; + } + else + { + DBUG_PRINT("info", ("Zero limit")); + zero_result_cause= "Zero limit"; + } + table_count= top_join_tab_count= 0; + handle_implicit_grouping_with_window_funcs(); + error= 0; + subq_exit_fl= true; + goto setup_subq_exit; + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + TABLE_LIST *tbl; + List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables); + while ((tbl= li++)) + { + Item **prune_cond= get_sargable_cond(this, tbl->table); + tbl->table->all_partitions_pruned_away= + prune_partitions(thd, tbl->table, *prune_cond); + } + } +#endif + + /* + Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if + there is implicit grouping (aggregate functions but no + group_list). In this case, the result set shall only contain one + row. + */ + if (tables_list && implicit_grouping) + { + int res; + /* + opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match + to the WHERE conditions, + or 1 if all items were resolved (optimized away), + or 0, or an error number HA_ERR_... + + If all items were resolved by opt_sum_query, there is no need to + open any tables. + */ + + /* + The following resetting and restoring of sum_funcs is needed to + go around a bug in spider where it assumes that + make_sum_func_list() has not been called yet and do logical + choices based on this if special handling of min/max functions should + be done. We disable this special handling while we are trying to find + out if we can replace MIN/MAX values with constants. + */ + Item_sum **save_func_sums= sum_funcs, *tmp_sum_funcs= 0; + sum_funcs= &tmp_sum_funcs; + res= opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds); + sum_funcs= save_func_sums; + + if (res) + { + DBUG_ASSERT(res >= 0); + if (res == HA_ERR_KEY_NOT_FOUND) + { + DBUG_PRINT("info",("No matching min/max row")); + zero_result_cause= "No matching min/max row"; + table_count= top_join_tab_count= 0; + error=0; + subq_exit_fl= true; + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + if (res > 1) + { + error= res; + DBUG_PRINT("error",("Error from opt_sum_query")); + DBUG_RETURN(1); + } + + DBUG_PRINT("info",("Select tables optimized away")); + if (!select_lex->have_window_funcs()) + zero_result_cause= "Select tables optimized away"; + tables_list= 0; // All tables resolved + select_lex->min_max_opt_list.empty(); + const_tables= top_join_tab_count= table_count; + handle_implicit_grouping_with_window_funcs(); + /* + Extract all table-independent conditions and replace the WHERE + clause with them. All other conditions were computed by opt_sum_query + and the MIN/MAX/COUNT function(s) have been replaced by constants, + so there is no need to compute the whole WHERE clause again. + Notice that make_cond_for_table() will always succeed to remove all + computed conditions, because opt_sum_query() is applicable only to + conjunctions. + Preserve conditions for EXPLAIN. + */ + if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED)) + { + COND *table_independent_conds= + make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1, + FALSE, FALSE); + if (!table_independent_conds && thd->is_error()) + DBUG_RETURN(1); + DBUG_EXECUTE("where", + print_where(table_independent_conds, + "where after opt_sum_query()", + QT_ORDINARY);); + conds= table_independent_conds; + } + } + } + if (!tables_list) + { + DBUG_PRINT("info",("No tables")); + error= 0; + subq_exit_fl= true; + goto setup_subq_exit; + } + error= -1; // Error is sent to client + /* get_sort_by_table() call used to be here: */ + MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table)); + + /* + We have to remove constants and duplicates from group_list before + calling make_join_statistics() as this may call get_best_group_min_max() + which needs a simplified group_list. + */ + if (group_list && table_count == 1) + { + group_list= remove_const(this, group_list, conds, + rollup.state == ROLLUP::STATE_NONE, + &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + if (!group_list) + { + /* The output has only one row */ + order=0; + simple_order=1; + group_optimized_away= 1; + select_distinct=0; + } + } + + /* Calculate how to do the join */ + THD_STAGE_INFO(thd, stage_statistics); + result->prepare_to_read_rows(); + if (unlikely(make_join_statistics(this, select_lex->leaf_tables, + &keyuse)) || + unlikely(thd->is_error())) + { + DBUG_PRINT("error",("Error: make_join_statistics() failed")); + DBUG_RETURN(1); + } + + /* + If a splittable materialized derived/view dt_i is embedded into + into another splittable materialized derived/view dt_o then + splitting plans for dt_i and dt_o are evaluated independently. + First the optimizer looks for the best splitting plan sp_i for dt_i. + It happens when non-splitting plans for dt_o are evaluated. + The cost of sp_i is considered as the cost of materialization of dt_i + when evaluating any splitting plan for dt_o. + */ + if (fix_all_splittings_in_plan()) + DBUG_RETURN(1); + +setup_subq_exit: + with_two_phase_optimization= check_two_phase_optimization(thd); + if (with_two_phase_optimization) + optimization_state= JOIN::OPTIMIZATION_PHASE_1_DONE; + else + { + if (optimize_stage2()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +int JOIN::optimize_stage2() +{ + ulonglong select_opts_for_readinfo; + uint no_jbuf_after; + JOIN_TAB *tab; + DBUG_ENTER("JOIN::optimize_stage2"); + + if (subq_exit_fl) + goto setup_subq_exit; + + if (unlikely(thd->check_killed())) + DBUG_RETURN(1); + + /* Generate an execution plan from the found optimal join order. */ + if (get_best_combination()) + DBUG_RETURN(1); + + if (make_range_rowid_filters()) + DBUG_RETURN(1); + + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)) + drop_unused_derived_keys(); + + if (rollup.state != ROLLUP::STATE_NONE) + { + if (rollup_process_const_fields()) + { + DBUG_PRINT("error", ("Error: rollup_process_fields() failed")); + DBUG_RETURN(1); + } + } + else + { + /* Remove distinct if only const tables */ + select_distinct= select_distinct && (const_tables != table_count); + } + + THD_STAGE_INFO(thd, stage_preparing); + if (result->initialize_tables(this)) + { + DBUG_PRINT("error",("Error: initialize_tables() failed")); + DBUG_RETURN(1); // error == -1 + } + if (const_table_map != found_const_table_map && + !(select_options & SELECT_DESCRIBE)) + { + // There is at least one empty const table + zero_result_cause= "no matching row in const table"; + DBUG_PRINT("error",("Error: %s", zero_result_cause)); + error= 0; + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) && + best_read > (double) thd->variables.max_join_size && + !(select_options & SELECT_DESCRIBE)) + { /* purecov: inspected */ + my_message(ER_TOO_BIG_SELECT, ER_THD(thd, ER_TOO_BIG_SELECT), MYF(0)); + error= -1; + DBUG_RETURN(1); + } + if (const_tables && !thd->locked_tables_mode && + !(select_options & SELECT_NO_UNLOCK)) + { + /* + Unlock all tables, except sequences, as accessing these may still + require table updates. It's safe to ignore result code as all + tables where opened for read only. + */ + (void) mysql_unlock_some_tables(thd, table, const_tables, + GET_LOCK_SKIP_SEQUENCES); + } + if (!conds && outer_join) + { + /* Handle the case where we have an OUTER JOIN without a WHERE */ + conds= (Item*) Item_true; + } + + if (impossible_where) + { + zero_result_cause= + "Impossible WHERE noticed after reading const tables"; + select_lex->mark_const_derived(zero_result_cause); + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + + select= make_select(*table, const_table_map, + const_table_map, conds, (SORT_INFO*) 0, 1, &error); + if (unlikely(error)) + { /* purecov: inspected */ + error= -1; /* purecov: inspected */ + DBUG_PRINT("error",("Error: make_select() failed")); + DBUG_RETURN(1); + } + + reset_nj_counters(this, join_list); + if (make_outerjoin_info(this)) + { + DBUG_RETURN(1); + } + + /* + Among the equal fields belonging to the same multiple equality + choose the one that is to be retrieved first and substitute + all references to these in where condition for a reference for + the selected field. + */ + if (conds) + { + conds= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, conds, + cond_equal, map2table, true); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + conds->update_used_tables(); + + if (unlikely(thd->trace_started())) + trace_condition(thd, "WHERE", "substitute_best_equal", conds); + + DBUG_EXECUTE("where", + print_where(conds, + "after substitute_best_equal", + QT_ORDINARY);); + } + if (having) + { + having= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, having, + having_equal, map2table, false); + if (thd->is_error()) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + if (having) + { + having->update_used_tables(); + if (unlikely(thd->trace_started())) + trace_condition(thd, "HAVING", "substitute_best_equal", having); + } + + DBUG_EXECUTE("having", + print_where(having, + "after substitute_best_equal", + QT_ORDINARY);); + } + + /* + Perform the optimization on fields evaluation mentioned above + for all on expressions. + */ + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (*tab->on_expr_ref) + { + *tab->on_expr_ref= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, + *tab->on_expr_ref, + tab->cond_equal, + map2table, true); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_PRINT("error",("Error from substitute_for_best_equal")); + DBUG_RETURN(1); + } + (*tab->on_expr_ref)->update_used_tables(); + if (unlikely(thd->trace_started())) + { + trace_condition(thd, "ON expr", "substitute_best_equal", + (*tab->on_expr_ref), tab->table->alias.c_ptr()); + } + } + } + + /* + Perform the optimization on fields evaliation mentioned above + for all used ref items. + */ + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + uint key_copy_index=0; + for (uint i=0; i < tab->ref.key_parts; i++) + { + Item **ref_item_ptr= tab->ref.items+i; + Item *ref_item= *ref_item_ptr; + if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE)) + continue; + COND_EQUAL *equals= cond_equal; + JOIN_TAB *first_inner= tab->first_inner; + while (equals) + { + ref_item= substitute_for_best_equal_field(thd, tab, ref_item, + equals, map2table, true); + if (unlikely(thd->is_error())) + DBUG_RETURN(1); + + if (first_inner) + { + equals= first_inner->cond_equal; + first_inner= first_inner->first_upper; + } + else + equals= 0; + } + ref_item->update_used_tables(); + if (*ref_item_ptr != ref_item) + { + *ref_item_ptr= ref_item; + Item *item= ref_item->real_item(); + store_key *key_copy= tab->ref.key_copy[key_copy_index]; + if (key_copy->type() == store_key::FIELD_STORE_KEY) + { + if (item->basic_const_item()) + { + /* It is constant propagated here */ + tab->ref.key_copy[key_copy_index]= + new store_key_const_item(*tab->ref.key_copy[key_copy_index], + item); + } + else if (item->const_item()) + { + tab->ref.key_copy[key_copy_index]= + new store_key_item(*tab->ref.key_copy[key_copy_index], + item, TRUE); + } + else + { + store_key_field *field_copy= ((store_key_field *)key_copy); + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + field_copy->change_source_field((Item_field *) item); + } + } + } + key_copy_index++; + } + } + + if (conds && const_table_map != found_const_table_map && + (select_options & SELECT_DESCRIBE)) + conds= (Item*) Item_false; + + /* Cache constant expressions in WHERE, HAVING, ON clauses. */ + cache_const_exprs(); + + if (setup_semijoin_loosescan(this)) + DBUG_RETURN(1); + + if (make_join_select(this, select, conds)) + { + if (thd->is_error()) + DBUG_RETURN(1); + zero_result_cause= + "Impossible WHERE noticed after reading const tables"; + select_lex->mark_const_derived(zero_result_cause); + handle_implicit_grouping_with_window_funcs(); + goto setup_subq_exit; + } + + error= -1; /* if goto err */ + + /* Optimize distinct away if possible */ + { + ORDER *org_order= order; + order=remove_const(this, order,conds,1, &simple_order); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + + /* + If we are using ORDER BY NULL or ORDER BY const_expression, + return result in any order (even if we are using a GROUP BY) + */ + if (!order && org_order) + skip_sort_order= 1; + } + + /* + For FETCH ... WITH TIES save how many items order by had, after we've + removed constant items that have no relevance on the final sorting. + */ + if (unit->lim.is_with_ties()) + { + DBUG_ASSERT(with_ties_order_count == 0); + for (ORDER *it= order; it; it= it->next) + with_ties_order_count+= 1; + } + + + /* + Check if we can optimize away GROUP BY/DISTINCT. + We can do that if there are no aggregate functions, the + fields in DISTINCT clause (if present) and/or columns in GROUP BY + (if present) contain direct references to all key parts of + an unique index (in whatever order) and if the key parts of the + unique index cannot contain NULLs. + Note that the unique keys for DISTINCT and GROUP BY should not + be the same (as long as they are unique). + + The FROM clause must contain a single non-constant table. + */ + if (table_count - const_tables == 1 && (group || select_distinct) && + !tmp_table_param.sum_func_count && + (!join_tab[const_tables].select || + !join_tab[const_tables].select->quick || + join_tab[const_tables].select->quick->get_type() != + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) && + !select_lex->have_window_funcs()) + { + if (group && rollup.state == ROLLUP::STATE_NONE && + list_contains_unique_index(join_tab[const_tables].table, + find_field_in_order_list, + (void *) group_list)) + { + /* + We have found that grouping can be removed since groups correspond to + only one row anyway, but we still have to guarantee correct result + order. The line below effectively rewrites the query from GROUP BY + <fields> to ORDER BY <fields>. There are three exceptions: + - if skip_sort_order is set (see above), then we can simply skip + GROUP BY; + - if we are in a subquery, we don't have to maintain order unless there + is a limit clause in the subquery. + - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible' + with the GROUP BY ones, i.e. either one is a prefix of another. + We only check if the ORDER BY is a prefix of GROUP BY. In this case + test_if_subpart() copies the ASC/DESC attributes from the original + ORDER BY fields. + If GROUP BY is a prefix of ORDER BY, then it is safe to leave + 'order' as is. + */ + if (!order || test_if_subpart(group_list, order)) + { + if (skip_sort_order || + (select_lex->master_unit()->item && select_limit == HA_POS_ERROR)) // This is a subquery + order= NULL; + else + order= group_list; + } + /* + If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be + rewritten to IGNORE INDEX FOR ORDER BY(fields). + */ + join_tab->table->keys_in_use_for_order_by= + join_tab->table->keys_in_use_for_group_by; + group_list= 0; + group= 0; + } + if (select_distinct && + list_contains_unique_index(join_tab[const_tables].table, + find_field_in_item_list, + (void *) &fields_list)) + { + select_distinct= 0; + } + } + if (group || tmp_table_param.sum_func_count) + { + if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE + && !select_lex->have_window_funcs()) + select_distinct=0; + } + else if (select_distinct && table_count - const_tables == 1 && + rollup.state == ROLLUP::STATE_NONE && + !select_lex->have_window_funcs()) + { + /* + We are only using one table. In this case we change DISTINCT to a + GROUP BY query if: + - The GROUP BY can be done through indexes (no sort) and the ORDER + BY only uses selected fields. + (In this case we can later optimize away GROUP BY and ORDER BY) + - We are scanning the whole table without LIMIT + This can happen if: + - We are using CALC_FOUND_ROWS + - We are using an ORDER BY that can't be optimized away. + + We don't want to use this optimization when we are using LIMIT + because in this case we can just create a temporary table that + holds LIMIT rows and stop when this table is full. + */ + bool all_order_fields_used; + + tab= &join_tab[const_tables]; + if (order) + { + bool fatal_err; + skip_sort_order= + test_if_skip_sort_order(tab, order, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_order_by, + &fatal_err); + if (fatal_err) + DBUG_RETURN(1); + } + if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array, + order, fields_list, all_fields, + &all_order_fields_used))) + { + bool fatal_err= 0; + const bool skip_group= + skip_sort_order && + test_if_skip_sort_order(tab, group_list, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_group_by, + &fatal_err); + if (fatal_err) + DBUG_RETURN(1); + + count_field_types(select_lex, &tmp_table_param, all_fields, 0); + if ((skip_group && all_order_fields_used) || + select_limit == HA_POS_ERROR || + (order && !skip_sort_order)) + { + /* Change DISTINCT to GROUP BY */ + select_distinct= 0; + no_order= !order; + if (all_order_fields_used) + { + if (order && skip_sort_order) + { + /* + Force MySQL to read the table in sorted order to get result in + ORDER BY order. + */ + tmp_table_param.quick_group=0; + } + order=0; + } + group=1; // For end_write_group + } + else + group_list= 0; + } + else if (thd->is_error()) // End of memory + DBUG_RETURN(1); + } + simple_group= rollup.state == ROLLUP::STATE_NONE; + if (group) + { + /* + Update simple_group and group_list as we now have more information, like + which tables or columns are constant. + */ + group_list= remove_const(this, group_list, conds, + rollup.state == ROLLUP::STATE_NONE, + &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + if (!group_list) + { + /* The output has only one row */ + order=0; + simple_order=1; + select_distinct= 0; + group_optimized_away= 1; + } + } + + calc_group_buffer(this, group_list); + send_group_parts= tmp_table_param.group_parts; /* Save org parts */ + if (procedure && procedure->group) + { + group_list= procedure->group= remove_const(this, procedure->group, conds, + 1, &simple_group); + if (unlikely(thd->is_error())) + { + error= 1; + DBUG_RETURN(1); + } + calc_group_buffer(this, group_list); + } + + /* + We can ignore ORDER BY if it's a prefix of the GROUP BY list + (as MariaDB is by default sorting on GROUP BY) or + if there is no GROUP BY and aggregate functions are used + (as the result will only contain one row). + */ + if (order && (test_if_subpart(group_list, order) || + (!group_list && tmp_table_param.sum_func_count))) + order=0; + + // Can't use sort on head table if using join buffering + if (full_join || hash_join) + { + TABLE *stable= (sort_by_table == (TABLE *) 1 ? + join_tab[const_tables].table : sort_by_table); + /* + FORCE INDEX FOR ORDER BY can be used to prevent join buffering when + sorting on the first table. + */ + if (!stable || (!stable->force_index_order && + !map2table[stable->tablenr]->keep_current_rowid)) + { + if (group_list) + simple_group= 0; + if (order) + simple_order= 0; + } + } + + need_tmp= test_if_need_tmp_table(); + + /* + If window functions are present then we can't have simple_order set to + TRUE as the window function needs a temp table for computation. + ORDER BY is computed after the window function computation is done, so + the sort will be done on the temp table. + */ + if (select_lex->have_window_funcs()) + simple_order= FALSE; + + /* + If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table + whose columns are required to be returned in a sorted order, then + the proper value for no_jbuf_after should be yielded by a call to + the make_join_orderinfo function. + Yet the current implementation of FORCE INDEX hints does not + allow us to do it in a clean manner. + */ + no_jbuf_after= 1 ? table_count : make_join_orderinfo(this); + + // Don't use join buffering when we use MATCH + select_opts_for_readinfo= + (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) | + (select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0); + + if (select_lex->options & OPTION_SCHEMA_TABLE && + optimize_schema_tables_reads(this)) + DBUG_RETURN(1); + + if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after)) + DBUG_RETURN(1); + + /* Perform FULLTEXT search before all regular searches */ + if (!(select_options & SELECT_DESCRIBE)) + if (init_ftfuncs(thd, select_lex, MY_TEST(order))) + DBUG_RETURN(1); + + /* + It's necessary to check const part of HAVING cond as + there is a chance that some cond parts may become + const items after make_join_statistics(for example + when Item is a reference to cost table field from + outer join). + This check is performed only for those conditions + which do not use aggregate functions. In such case + temporary table may not be used and const condition + elements may be lost during further having + condition transformation in JOIN::exec. + */ + if (having && const_table_map && !having->with_sum_func()) + { + having->update_used_tables(); + having= having->remove_eq_conds(thd, &select_lex->having_value, true); + if (select_lex->having_value == Item::COND_FALSE) + { + having= (Item*) Item_false; + zero_result_cause= "Impossible HAVING noticed after reading const tables"; + error= 0; + select_lex->mark_const_derived(zero_result_cause); + goto setup_subq_exit; + } + } + + if (optimize_unflattened_subqueries()) + DBUG_RETURN(1); + + int res; + if ((res= rewrite_to_index_subquery_engine(this)) != -1) + DBUG_RETURN(res); + if (setup_subquery_caches()) + DBUG_RETURN(-1); + + /* + Need to tell handlers that to play it safe, it should fetch all + columns of the primary key of the tables: this is because MySQL may + build row pointers for the rows, and for all columns of the primary key + the read set has not necessarily been set by the server code. + */ + if (need_tmp || select_distinct || group_list || order) + { + for (uint i= 0; i < table_count; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } + } + + DBUG_EXECUTE("info",TEST_join(this);); + + if (!only_const_tables()) + { + JOIN_TAB *tab= &join_tab[const_tables]; + + if (order && !need_tmp) + { + /* + Force using of tmp table if sorting by a SP or UDF function due to + their expensive and probably non-deterministic nature. + */ + for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= *tmp_order->item; + if (item->is_expensive()) + { + /* Force tmp table without sort */ + need_tmp=1; simple_order=simple_group=0; + break; + } + } + } + + /* + Because filesort always does a full table scan or a quick range scan + we must add the removed reference to the select for the table. + We only need to do this when we have a simple_order or simple_group + as in other cases the join is done before the sort. + */ + if ((order || group_list) && + tab->type != JT_ALL && + tab->type != JT_FT && + tab->type != JT_REF_OR_NULL && + ((order && simple_order) || (group_list && simple_group))) + { + if (add_ref_to_table_cond(thd,tab)) { + DBUG_RETURN(1); + } + } + /* + Investigate whether we may use an ordered index as part of either + DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be + used for only the first of any of these terms to be executed. This + is reflected in the order which we check for test_if_skip_sort_order() + below. However we do not check for DISTINCT here, as it would have + been transformed to a GROUP BY at this stage if it is a candidate for + ordered index optimization. + If a decision was made to use an ordered index, the availability + of such an access path is stored in 'ordered_index_usage' for later + use by 'execute' or 'explain' + */ + DBUG_ASSERT(ordered_index_usage == ordered_index_void); + + if (group_list) // GROUP BY honoured first + // (DISTINCT was rewritten to GROUP BY if skippable) + { + /* + When there is SQL_BIG_RESULT do not sort using index for GROUP BY, + and thus force sorting on disk unless a group min-max optimization + is going to be used as it is applied now only for one table queries + with covering indexes. + */ + if (!(select_options & SELECT_BIG_RESULT) || + (tab->select && + tab->select->quick && + tab->select->quick->get_type() == + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) + { + if (simple_group && // GROUP BY is possibly skippable + !select_distinct) // .. if not preceded by a DISTINCT + { + /* + Calculate a possible 'limit' of table rows for 'GROUP BY': + A specified 'LIMIT' is relative to the final resultset. + 'need_tmp' implies that there will be more postprocessing + so the specified 'limit' should not be enforced yet. + */ + bool fatal_err; + const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit; + if (test_if_skip_sort_order(tab, group_list, limit, false, + &tab->table->keys_in_use_for_group_by, + &fatal_err)) + { + ordered_index_usage= ordered_index_group_by; + } + if (fatal_err) + DBUG_RETURN(1); + } + + /* + If we are going to use semi-join LooseScan, it will depend + on the selected index scan to be used. If index is not used + for the GROUP BY, we risk that sorting is put on the LooseScan + table. In order to avoid this, force use of temporary table. + TODO: Explain the quick_group part of the test below. + */ + if ((ordered_index_usage != ordered_index_group_by) && + ((tmp_table_param.quick_group && !procedure) || + (tab->emb_sj_nest && + best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN))) + { + need_tmp=1; + simple_order= simple_group= false; // Force tmp table without sort + } + } + } + else if (order && // ORDER BY wo/ preceding GROUP BY + (simple_order || skip_sort_order)) // which is possibly skippable + { + bool fatal_err; + if (test_if_skip_sort_order(tab, order, select_limit, false, + &tab->table->keys_in_use_for_order_by, + &fatal_err)) + { + ordered_index_usage= ordered_index_order_by; + } + if (fatal_err) + DBUG_RETURN(1); + } + } + + if (having) + having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT); + tmp_having= having; + + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + + /* + The loose index scan access method guarantees that all grouping or + duplicate row elimination (for distinct) is already performed + during data retrieval, and that all MIN/MAX functions are already + computed for each group. Thus all MIN/MAX functions should be + treated as regular functions, and there is no need to perform + grouping in the main execution loop. + Notice that currently loose index scan is applicable only for + single table queries, thus it is sufficient to test only the first + join_tab element of the plan for its access method. + */ + if (join_tab->is_using_loose_index_scan()) + { + tmp_table_param.precomputed_group_by= TRUE; + if (join_tab->is_using_agg_loose_index_scan()) + { + need_distinct= FALSE; + tmp_table_param.precomputed_group_by= FALSE; + } + } + + if (make_aggr_tables_info()) + DBUG_RETURN(1); + + init_join_cache_and_keyread(); + + if (init_range_rowid_filters()) + DBUG_RETURN(1); + + error= 0; + + if (select_options & SELECT_DESCRIBE) + goto derived_exit; + + DBUG_RETURN(0); + +setup_subq_exit: + /* Choose an execution strategy for this JOIN. */ + if (!tables_list || !table_count) + { + choose_tableless_subquery_plan(); + + /* The output has atmost one row */ + if (group_list) + { + group_list= NULL; + group_optimized_away= 1; + rollup.state= ROLLUP::STATE_NONE; + } + order= NULL; + simple_order= TRUE; + select_distinct= FALSE; + + if (select_lex->have_window_funcs()) + { + if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) + DBUG_RETURN(1); +#ifndef DBUG_OFF + dbug_join_tab_array_size= 1; +#endif + need_tmp= 1; + } + if (make_aggr_tables_info()) + DBUG_RETURN(1); + + /* + It could be that we've only done optimization stage 1 for + some of the derived tables, and never did stage 2. + Do it now, otherwise Explain data structure will not be complete. + */ + if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE)) + DBUG_RETURN(1); + } + /* + Even with zero matching rows, subqueries in the HAVING clause may + need to be evaluated if there are aggregate functions in the query. + */ + if (optimize_unflattened_subqueries()) + DBUG_RETURN(1); + error= 0; + +derived_exit: + + select_lex->mark_const_derived(zero_result_cause); + DBUG_RETURN(0); +} + +/** + Add having condition as a where clause condition of the given temp table. + + @param tab Table to which having condition is added. + + @returns false if success, true if error. +*/ + +bool JOIN::add_having_as_table_cond(JOIN_TAB *tab) +{ + tmp_having->update_used_tables(); + table_map used_tables= tab->table->map | OUTER_REF_TABLE_BIT; + + /* If tmp table is not used then consider conditions of const table also */ + if (!need_tmp) + used_tables|= const_table_map; + + DBUG_ENTER("JOIN::add_having_as_table_cond"); + + Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables, + (table_map) 0, 0, false, false); + if (sort_table_cond) + { + if (!tab->select) + { + if (!(tab->select= new SQL_SELECT)) + DBUG_RETURN(true); + tab->select->head= tab->table; + } + if (!tab->select->cond) + tab->select->cond= sort_table_cond; + else + { + if (!(tab->select->cond= + new (thd->mem_root) Item_cond_and(thd, + tab->select->cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (tab->pre_idx_push_select_cond) + { + if (sort_table_cond->type() == Item::COND_ITEM) + sort_table_cond= sort_table_cond->copy_andor_structure(thd); + if (!(tab->pre_idx_push_select_cond= + new (thd->mem_root) Item_cond_and(thd, + tab->pre_idx_push_select_cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (tab->select->cond) + tab->select->cond->fix_fields_if_needed(thd, 0); + if (tab->pre_idx_push_select_cond) + tab->pre_idx_push_select_cond->fix_fields_if_needed(thd, 0); + tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond; + tab->set_select_cond(tab->select->cond, __LINE__); + tab->select_cond->top_level_item(); + DBUG_EXECUTE("where",print_where(tab->select->cond, + "select and having", + QT_ORDINARY);); + + having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0, + ~used_tables, 0, false, false); + if (!having && thd->is_error()) + DBUG_RETURN(true); + DBUG_EXECUTE("where", + print_where(having, "having after sort", QT_ORDINARY);); + } + else if (thd->is_error()) + DBUG_RETURN(true); + + DBUG_RETURN(false); +} + + +bool JOIN::add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *table_fields) +{ + /* + this will not walk into semi-join materialization nests but this is ok + because we will never need to save current rowids for those. + */ + for (JOIN_TAB *tab=join_tab; tab < cur; tab++) + { + if (!tab->keep_current_rowid) + continue; + Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table); + item->fix_fields(thd, 0); + table_fields->push_back(item, thd->mem_root); + cur->tmp_table_param->func_count++; + } + return 0; +} + + +/** + Set info for aggregation tables + + @details + This function finalizes execution plan by taking following actions: + .) aggregation temporary tables are created, but not instantiated + (this is done during execution). + JOIN_TABs for aggregation tables are set appropriately + (see JOIN::create_postjoin_aggr_table). + .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for + each required stage of execution. These fields lists are set for + working tables' tabs and for the tab of last table in the join. + .) info for sorting/grouping/dups removal is prepared and saved in + appropriate tabs. Here is an example: + + @returns + false - Ok + true - Error +*/ + +bool JOIN::make_aggr_tables_info() +{ + List<Item> *curr_all_fields= &all_fields; + List<Item> *curr_fields_list= &fields_list; + JOIN_TAB *curr_tab= join_tab + const_tables; + TABLE *exec_tmp_table= NULL; + bool distinct= false; + const bool has_group_by= this->group; + bool keep_row_order= thd->lex->with_rownum && (group_list || order); + bool is_having_added_as_table_cond= false; + DBUG_ENTER("JOIN::make_aggr_tables_info"); + + + sort_and_group_aggr_tab= NULL; + + if (group_optimized_away) + implicit_grouping= true; + + bool implicit_grouping_with_window_funcs= implicit_grouping && + select_lex->have_window_funcs(); + bool implicit_grouping_without_tables= implicit_grouping && + !tables_list; + + /* + Setup last table to provide fields and all_fields lists to the next + node in the plan. + */ + if (join_tab && top_join_tab_count && tables_list) + { + join_tab[top_join_tab_count - 1].fields= &fields_list; + join_tab[top_join_tab_count - 1].all_fields= &all_fields; + } + + /* + All optimization is done. Check if we can use the storage engines + group by handler to evaluate the group by. + Some storage engines, like spider can also do joins, group by and + distinct in the engine, so we do this for all queries, not only + GROUP BY queries. + */ + if (tables_list && top_join_tab_count && !procedure) + { + /* + At the moment we only support push down for queries where + all tables are in the same storage engine + */ + TABLE_LIST *tbl= tables_list; + handlerton *ht= tbl && tbl->table ? tbl->table->file->partition_ht() : 0; + for (tbl= tbl->next_local; ht && tbl; tbl= tbl->next_local) + { + if (!tbl->table || tbl->table->file->partition_ht() != ht) + ht= 0; + } + + if (ht && ht->create_group_by) + { + /* + Check if the storage engine can intercept the query + + JOIN::optimize_stage2() might convert DISTINCT into GROUP BY and then + optimize away GROUP BY (group_list). In such a case, we need to notify + a storage engine supporting a group by handler of the existence of the + original DISTINCT. Thus, we set select_distinct || group_optimized_away + to Query::distinct. + */ + Query query= {&all_fields, select_distinct || group_optimized_away, + tables_list, conds, + group_list, order ? order : group_list, having, + &select_lex->master_unit()->lim}; + group_by_handler *gbh= ht->create_group_by(thd, &query); + + if (gbh) + { + if (!(pushdown_query= new (thd->mem_root) Pushdown_query(select_lex, + gbh))) + DBUG_RETURN(1); + /* + We must store rows in the tmp table if we need to do an ORDER BY + or DISTINCT and the storage handler can't handle it. + */ + need_tmp= query.order_by || query.group_by || query.distinct; + distinct= query.distinct; + keep_row_order= query.order_by || query.group_by; + + order= query.order_by; + + aggr_tables++; + curr_tab= join_tab + exec_join_tab_cnt(); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + curr_tab->join= this; + + if (!(curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param))) + DBUG_RETURN(1); + curr_tab->tmp_table_param->func_count= all_fields.elements; + TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param, + all_fields, + NULL, distinct, + TRUE, select_options, HA_POS_ERROR, + &empty_clex_str, !need_tmp, + keep_row_order); + if (!table) + DBUG_RETURN(1); + + if (!(curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab))) + DBUG_RETURN(1); + curr_tab->aggr->set_write_func(::end_send); + curr_tab->table= table; + /* + Setup reference fields, used by summary functions and group by fields, + to point to the temporary table. + The actual switching to the temporary tables fields for HAVING + and ORDER BY is done in do_select() by calling + set_items_ref_array(items1). + */ + init_items_ref_array(); + items1= ref_ptr_array_slice(2); + //items1= items0 + all_fields.elements; + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(1); + + /* Give storage engine access to temporary table */ + gbh->table= table; + pushdown_query->store_data_in_temp_table= need_tmp; + pushdown_query->having= having; + + /* + Group by and having is calculated by the group_by handler. + Reset the group by and having + */ + DBUG_ASSERT(query.group_by == NULL); + group= 0; group_list= 0; + having= tmp_having= 0; + /* + Select distinct is handled by handler or by creating an unique index + over all fields in the temporary table + */ + select_distinct= 0; + order= query.order_by; + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + + fields= curr_fields_list; + + //todo: new: + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; + + DBUG_RETURN(thd->is_error()); + } + } + } + + + /* + The loose index scan access method guarantees that all grouping or + duplicate row elimination (for distinct) is already performed + during data retrieval, and that all MIN/MAX functions are already + computed for each group. Thus all MIN/MAX functions should be + treated as regular functions, and there is no need to perform + grouping in the main execution loop. + Notice that currently loose index scan is applicable only for + single table queries, thus it is sufficient to test only the first + join_tab element of the plan for its access method. + */ + if (join_tab && top_join_tab_count && tables_list && + join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= + !join_tab->is_using_agg_loose_index_scan(); + + group_list_for_estimates= group_list; + /* Create a tmp table if distinct or if the sort is too complicated */ + if (need_tmp) + { + aggr_tables++; + curr_tab= join_tab + exec_join_tab_cnt(); + DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + if (only_const_tables()) + first_select= sub_select_postjoin_aggr; + + /* + Create temporary table on first execution of this join. + (Will be reused if this is a subquery that is executed several times.) + */ + init_items_ref_array(); + + ORDER *tmp_group= (ORDER *) 0; + if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP)) + tmp_group= group_list; + + tmp_table_param.hidden_field_count= + all_fields.elements - fields_list.elements; + + distinct= select_distinct && !group_list && + !select_lex->have_window_funcs(); + keep_row_order= thd->lex->with_rownum && (group_list || order); + bool save_sum_fields= (group_list && simple_group) || + implicit_grouping_with_window_funcs; + if (create_postjoin_aggr_table(curr_tab, + &all_fields, tmp_group, + save_sum_fields, + distinct, keep_row_order)) + DBUG_RETURN(true); + exec_tmp_table= curr_tab->table; + + if (exec_tmp_table->distinct) + optimize_distinct(); + + /* Change sum_fields reference to calculated fields in tmp_table */ + items1= ref_ptr_array_slice(2); + if ((sort_and_group || curr_tab->table->group || + tmp_table_param.precomputed_group_by) && + !implicit_grouping_without_tables) + { + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + else + { + if (change_refs_to_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + curr_all_fields= &tmp_all_fields1; + curr_fields_list= &tmp_fields_list1; + // Need to set them now for correct group_fields setup, reset at the end. + set_items_ref_array(items1); + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; + set_postjoin_aggr_write_func(curr_tab); + + /* + If having is not handled here, it will be checked before the row is sent + to the client. + */ + if (tmp_having && + (sort_and_group || (exec_tmp_table->distinct && !group_list) || + select_lex->have_window_funcs())) + { + /* + If there is no select distinct and there are no window functions + then move the having to table conds of tmp table. + NOTE : We cannot apply having after distinct or window functions + If columns of having are not part of select distinct, + then distinct may remove rows which can satisfy having. + In the case of window functions we *must* make sure to not + store any rows which don't match HAVING within the temp table, + as rows will end up being used during their computation. + */ + if (!select_distinct && !select_lex->have_window_funcs() && + add_having_as_table_cond(curr_tab)) + DBUG_RETURN(true); + is_having_added_as_table_cond= tmp_having != having; + + /* + Having condition which we are not able to add as tmp table conds are + kept as before. And, this will be applied before storing the rows in + tmp table. + */ + curr_tab->having= having; + having= NULL; // Already done + } + + tmp_table_param.func_count= 0; + tmp_table_param.field_count+= tmp_table_param.func_count; + if (sort_and_group || curr_tab->table->group) + { + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } + + if (exec_tmp_table->group) + { // Already grouped + if (!order && !no_order && !skip_sort_order) + order= group_list; /* order by group */ + group_list= NULL; + } + + /* + If we have different sort & group then we must sort the data by group + and copy it to another tmp table. + + This code is also used if we are using distinct something + we haven't been able to store in the temporary table yet + like SEC_TO_TIME(SUM(...)). + + 3. Also, this is used when + - the query has Window functions, + - the GROUP BY operation is done with OrderedGroupBy algorithm. + In this case, the first temptable will contain pre-GROUP-BY data. Force + the creation of the second temporary table. Post-GROUP-BY dataset will be + written there, and then Window Function processing code will be able to + process it. + */ + if ((group_list && + (!test_if_subpart(group_list, order) || select_distinct)) || + (select_distinct && tmp_table_param.using_outer_summary_function) || + (group_list && !tmp_table_param.quick_group && // (3) + select_lex->have_window_funcs())) // (3) + { /* Must copy to another table */ + DBUG_PRINT("info",("Creating group table")); + + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, tmp_all_fields1, + select_distinct && !group_list); + tmp_table_param.hidden_field_count= + tmp_all_fields1.elements - tmp_fields_list1.elements; + + curr_tab++; + aggr_tables++; + DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size); + bzero((void*)curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + + /* group data to new table */ + /* + If the access method is loose index scan then all MIN/MAX + functions are precomputed, and should be treated as regular + functions. See extended comment above. + */ + if (join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= TRUE; + + tmp_table_param.hidden_field_count= + curr_all_fields->elements - curr_fields_list->elements; + ORDER *dummy= NULL; //TODO can use table->group here also + + if (create_postjoin_aggr_table(curr_tab, curr_all_fields, dummy, true, + distinct, keep_row_order)) + DBUG_RETURN(true); + + if (group_list) + { + if (!only_const_tables()) // No need to sort a single row + { + if (add_sorting_to_table(curr_tab - 1, group_list)) + DBUG_RETURN(true); + } + + if (make_group_fields(this, this)) + DBUG_RETURN(true); + } + + // Setup sum funcs only when necessary, otherwise we might break info + // for the first table + if (group_list || tmp_table_param.sum_func_count) + { + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + group_list= NULL; + if (setup_sum_funcs(thd, sum_funcs)) + DBUG_RETURN(true); + } + // No sum funcs anymore + DBUG_ASSERT(items2.is_null()); + + items2= ref_ptr_array_slice(3); + if (change_to_use_tmp_fields(thd, items2, + tmp_fields_list2, tmp_all_fields2, + fields_list.elements, tmp_all_fields1)) + DBUG_RETURN(true); + + curr_fields_list= &tmp_fields_list2; + curr_all_fields= &tmp_all_fields2; + set_items_ref_array(items2); + curr_tab->ref_array= &items2; + curr_tab->all_fields= &tmp_all_fields2; + curr_tab->fields= &tmp_fields_list2; + set_postjoin_aggr_write_func(curr_tab); + + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } + if (curr_tab->table->distinct) + select_distinct= false; /* Each row is unique */ + + if (select_distinct && !group_list) + { + if (having) + { + curr_tab->having= having; + having->update_used_tables(); + } + /* + We only need DISTINCT operation if the join is not degenerate. + If it is, we must not request DISTINCT processing, because + remove_duplicates() assumes there is a preceding computation step (and + in the degenerate join, there's none) + */ + if (top_join_tab_count && tables_list) + curr_tab->distinct= true; + + having= NULL; + select_distinct= false; + } + /* Clean tmp_table_param for the next tmp table. */ + tmp_table_param.field_count= tmp_table_param.sum_func_count= + tmp_table_param.func_count= 0; + + tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + first_record= sort_and_group=0; + + if (!group_optimized_away || implicit_grouping_with_window_funcs) + { + group= false; + } + else + { + /* + If grouping has been optimized away, a temporary table is + normally not needed unless we're explicitly requested to create + one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT). + + In this case (grouping was optimized away), temp_table was + created without a grouping expression and JOIN::exec() will not + perform the necessary grouping (by the use of end_send_group() + or end_write_group()) if JOIN::group is set to false. + */ + // the temporary table was explicitly requested + DBUG_ASSERT(select_options & OPTION_BUFFER_RESULT); + // the temporary table does not have a grouping expression + DBUG_ASSERT(!curr_tab->table->group); + } + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false); + } + + if (group || + (implicit_grouping && !implicit_grouping_with_window_funcs) || + tmp_table_param.sum_func_count) + { + if (make_group_fields(this, this)) + DBUG_RETURN(true); + + DBUG_ASSERT(items3.is_null()); + + if (items0.is_null()) + init_items_ref_array(); + items3= ref_ptr_array_slice(4); + setup_copy_fields(thd, &tmp_table_param, + items3, tmp_fields_list3, tmp_all_fields3, + curr_fields_list->elements, *curr_all_fields); + + curr_fields_list= &tmp_fields_list3; + curr_all_fields= &tmp_all_fields3; + set_items_ref_array(items3); + if (join_tab) + { + JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1; + // Set grouped fields on the last table + last_tab->ref_array= &items3; + last_tab->all_fields= &tmp_all_fields3; + last_tab->fields= &tmp_fields_list3; + } + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab || + !join_tab-> is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + if (unlikely(setup_sum_funcs(thd, sum_funcs) || thd->is_error())) + DBUG_RETURN(true); + } + if (group_list || order) + { + DBUG_PRINT("info",("Sorting for send_result_set_metadata")); + THD_STAGE_INFO(thd, stage_sorting_result); + /* If we have already done the group, add HAVING to sorted table */ + if (tmp_having && !is_having_added_as_table_cond && + !group_list && !sort_and_group) + { + if (add_having_as_table_cond(curr_tab)) + DBUG_RETURN(true); + } + + if (group) + select_limit= HA_POS_ERROR; + else if (!need_tmp) + { + /* + We can abort sorting after thd->select_limit rows if there are no + filter conditions for any tables after the sorted one. + Filter conditions come in several forms: + 1. as a condition item attached to the join_tab, or + 2. as a keyuse attached to the join_tab (ref access). + */ + for (uint i= const_tables + 1; i < top_join_tab_count; i++) + { + JOIN_TAB *const tab= join_tab + i; + if (tab->select_cond || // 1 + (tab->keyuse && !tab->first_inner)) // 2 + { + /* We have to sort all rows */ + select_limit= HA_POS_ERROR; + break; + } + } + } + /* + Here we add sorting stage for ORDER BY/GROUP BY clause, if the + optimiser chose FILESORT to be faster than INDEX SCAN or there is + no suitable index present. + OPTION_FOUND_ROWS supersedes LIMIT and is taken into account. + */ + DBUG_PRINT("info",("Sorting for order by/group by")); + ORDER *order_arg= group_list ? group_list : order; + if (top_join_tab_count + aggr_tables > const_tables && + ordered_index_usage != + (group_list ? ordered_index_group_by : ordered_index_order_by) && + curr_tab->type != JT_CONST && + curr_tab->type != JT_EQ_REF) // Don't sort 1 row + { + // Sort either first non-const table or the last tmp table + JOIN_TAB *sort_tab= curr_tab; + + if (add_sorting_to_table(sort_tab, order_arg)) + DBUG_RETURN(true); + /* + filesort_limit: Return only this many rows from filesort(). + We can use select_limit_cnt only if we have no group_by and 1 table. + This allows us to use Bounded_queue for queries like: + "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;" + m_select_limit == HA_POS_ERROR (we need a full table scan) + unit->select_limit_cnt == 1 (we only need one row in the result set) + */ + sort_tab->filesort->limit= + (has_group_by || (join_tab + top_join_tab_count > curr_tab + 1)) ? + select_limit : unit->lim.get_select_limit(); + + if (unit->lim.is_with_ties()) + sort_tab->filesort->limit= HA_POS_ERROR; + } + if (!only_const_tables() && + !join_tab[const_tables].filesort && + !(select_options & SELECT_DESCRIBE)) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= true; + } + } + + /* + Window functions computation step should be attached to the last join_tab + that's doing aggregation. + The last join_tab reads the data from the temp. table. It also may do + - sorting + - duplicate value removal + Both of these operations are done after window function computation step. + */ + curr_tab= join_tab + total_join_tab_cnt(); + if (select_lex->window_funcs.elements) + { + if (!(curr_tab->window_funcs_step= new Window_funcs_computation)) + DBUG_RETURN(true); + if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs, + curr_tab)) + DBUG_RETURN(true); + /* Count that we're using window functions. */ + status_var_increment(thd->status_var.feature_window_functions); + } + if (select_lex->custom_agg_func_used()) + status_var_increment(thd->status_var.feature_custom_aggregate_functions); + + /* + Allocate Cached_items of ORDER BY for FETCH FIRST .. WITH TIES. + The order list might have been modified prior to this, but we are + only interested in the initial order by columns, after all const + elements are removed. + */ + if (unit->lim.is_with_ties()) + { + if (alloc_order_fields(this, order, with_ties_order_count)) + DBUG_RETURN(true); + } + + fields= curr_fields_list; + // Reset before execution + set_items_ref_array(items0); + if (join_tab) + join_tab[exec_join_tab_cnt() + aggr_tables - 1].next_select= + setup_end_select_func(this); + group= has_group_by; + + DBUG_RETURN(false); +} + + + +bool +JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields, + ORDER *table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_order) +{ + DBUG_ENTER("JOIN::create_postjoin_aggr_table"); + THD_STAGE_INFO(thd, stage_creating_tmp_table); + + /* + Pushing LIMIT to the post-join temporary table creation is not applicable + when there is ORDER BY or GROUP BY or there is no GROUP BY, but + there are aggregate functions, because in all these cases we need + all result rows. + + We also can not push limit if the limit is WITH TIES, as we do not know + how many rows we will actually have. This can happen if ORDER BY was + a constant and removed (during remove_const), thus we have an "unlimited" + WITH TIES. + */ + ha_rows table_rows_limit= ((order == NULL || skip_sort_order) && + !table_group && + !select_lex->with_sum_func && + !unit->lim.is_with_ties()) ? select_limit + : HA_POS_ERROR; + + if (!(tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param))) + DBUG_RETURN(true); + if (tmp_table_keep_current_rowid) + add_fields_for_current_rowid(tab, table_fields); + tab->tmp_table_param->skip_create_table= true; + TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields, + table_group, distinct, + save_sum_fields, select_options, + table_rows_limit, + &empty_clex_str, true, keep_row_order); + if (!table) + DBUG_RETURN(true); + tmp_table_param.using_outer_summary_function= + tab->tmp_table_param->using_outer_summary_function; + tab->join= this; + DBUG_ASSERT(tab > tab->join->join_tab || !top_join_tab_count || + !tables_list); + tab->table= table; + if (tab > join_tab) + (tab - 1)->next_select= sub_select_postjoin_aggr; + + /* if group or order on first table, sort first */ + if ((group_list && simple_group) || + (implicit_grouping && select_lex->have_window_funcs())) + { + DBUG_PRINT("info",("Sorting for group")); + THD_STAGE_INFO(thd, stage_sorting_for_group); + + if (ordered_index_usage != ordered_index_group_by && + !only_const_tables() && + (join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row + !implicit_grouping && + add_sorting_to_table(join_tab + const_tables, group_list)) + goto err; + + if (alloc_group_fields(this, group_list)) + goto err; + if (make_sum_func_list(all_fields, fields_list, true)) + goto err; + if (prepare_sum_aggregators(thd, sum_funcs, + !(tables_list && + join_tab->is_using_agg_loose_index_scan()))) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + group_list= NULL; + } + else + { + if (prepare_sum_aggregators(thd, sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + + if (!group_list && !table->distinct && order && simple_order && + tab == join_tab + const_tables) + { + DBUG_PRINT("info",("Sorting for order")); + THD_STAGE_INFO(thd, stage_sorting_for_order); + + if (ordered_index_usage != ordered_index_order_by && + !only_const_tables() && + add_sorting_to_table(join_tab + const_tables, order)) + goto err; + order= NULL; + } + } + if (!(tab->aggr= new (thd->mem_root) AGGR_OP(tab))) + goto err; + table->reginfo.join_tab= tab; + DBUG_RETURN(false); + +err: + if (table != NULL) + free_tmp_table(thd, table); + tab->table= NULL; + DBUG_RETURN(true); +} + + +void +JOIN::optimize_distinct() +{ + for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;) + { + if (select_lex->select_list_tables & last_join_tab->table->map || + last_join_tab->use_join_cache) + break; + last_join_tab->shortcut_for_distinct= true; + if (last_join_tab == join_tab) + break; + --last_join_tab; + } + + /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ + if (order && skip_sort_order && !unit->lim.is_with_ties()) + { + /* Should already have been optimized away */ + DBUG_ASSERT(ordered_index_usage == ordered_index_order_by); + if (ordered_index_usage == ordered_index_order_by) + { + order= NULL; + } + } +} + + +/** + @brief Add Filesort object to the given table to sort if with filesort + + @param tab the JOIN_TAB object to attach created Filesort object to + @param order List of expressions to sort the table by + + @note This function moves tab->select, if any, to filesort->select + + @return false on success, true on OOM +*/ + +bool +JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) +{ + tab->filesort= + new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid, + tab->select); + if (!tab->filesort) + return true; + + TABLE *table= tab->table; + if ((tab == join_tab + const_tables) && + table->pos_in_table_list && + table->pos_in_table_list->is_sjm_scan_table()) + { + tab->filesort->set_all_read_bits= TRUE; + tab->filesort->unpack= unpack_to_base_table_fields; + } + + /* + Select was moved to filesort->select to force join_init_read_record to use + sorted result instead of reading table through select. + */ + if (tab->select) + { + tab->select= NULL; + tab->set_select_cond(NULL, __LINE__); + } + tab->read_first_record= join_init_read_record; + return false; +} + + + + +/** + Setup expression caches for subqueries that need them + + @details + The function wraps correlated subquery expressions that return one value + into objects of the class Item_cache_wrapper setting up an expression + cache for each of them. The result values of the subqueries are to be + cached together with the corresponding sets of the parameters - outer + references of the subqueries. + + @retval FALSE OK + @retval TRUE Error +*/ + +bool JOIN::setup_subquery_caches() +{ + DBUG_ENTER("JOIN::setup_subquery_caches"); + + /* + We have to check all this condition together because items created in + one of this clauses can be moved to another one by optimizer + */ + if (select_lex->expr_cache_may_be_used[IN_WHERE] || + select_lex->expr_cache_may_be_used[IN_HAVING] || + select_lex->expr_cache_may_be_used[IN_ON] || + select_lex->expr_cache_may_be_used[NO_MATTER]) + { + JOIN_TAB *tab; + if (conds && + !(conds= conds->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (tab->select_cond && + !(tab->select_cond= + tab->select_cond->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + if (tab->cache_select && tab->cache_select->cond) + if (!(tab->cache_select->cond= + tab->cache_select-> + cond->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + + if (having && + !(having= having->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + + if (tmp_having) + { + DBUG_ASSERT(having == NULL); + if (!(tmp_having= + tmp_having->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + if (select_lex->expr_cache_may_be_used[SELECT_LIST] || + select_lex->expr_cache_may_be_used[IN_GROUP_BY] || + select_lex->expr_cache_may_be_used[NO_MATTER]) + { + List_iterator<Item> li(all_fields); + Item *item; + while ((item= li++)) + { + Item *new_item; + if (!(new_item= + item->transform(thd, &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + if (new_item != item) + { + thd->change_item_tree(li.ref(), new_item); + } + } + for (ORDER *tmp_group= group_list; tmp_group ; tmp_group= tmp_group->next) + { + if (!(*tmp_group->item= + (*tmp_group->item)->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + if (select_lex->expr_cache_may_be_used[NO_MATTER]) + { + for (ORDER *ord= order; ord; ord= ord->next) + { + if (!(*ord->item= + (*ord->item)->transform(thd, + &Item::expr_cache_insert_transformer, + NULL))) + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Shrink join buffers used for preceding tables to reduce the occupied space + + SYNOPSIS + shrink_join_buffers() + jt table up to which the buffers are to be shrunk + curr_space the size of the space used by the buffers for tables 1..jt + needed_space the size of the space that has to be used by these buffers + + DESCRIPTION + The function makes an attempt to shrink all join buffers used for the + tables starting from the first up to jt to reduce the total size of the + space occupied by the buffers used for tables 1,...,jt from curr_space + to needed_space. + The function assumes that the buffer for the table jt has not been + allocated yet. + + RETURN + FALSE if all buffer have been successfully shrunk + TRUE otherwise +*/ + +bool JOIN::shrink_join_buffers(JOIN_TAB *jt, + ulonglong curr_space, + ulonglong needed_space) +{ + JOIN_TAB *tab; + JOIN_CACHE *cache; + for (tab= first_linear_tab(this, WITHOUT_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab != jt; + tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS)) + { + cache= tab->cache; + if (cache) + { + size_t buff_size; + if (needed_space < cache->get_min_join_buffer_size()) + return TRUE; + if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space)) + { + revise_cache_usage(tab); + return TRUE; + } + buff_size= cache->get_join_buffer_size(); + curr_space-= buff_size; + needed_space-= buff_size; + } + } + + cache= jt->cache; + DBUG_ASSERT(cache); + if (needed_space < cache->get_min_join_buffer_size()) + return TRUE; + cache->set_join_buffer_size((size_t)needed_space); + + return FALSE; +} + + +int +JOIN::reinit() +{ + DBUG_ENTER("JOIN::reinit"); + + first_record= false; + group_sent= false; + cleaned= false; + accepted_rows= 0; + + if (aggr_tables) + { + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + JOIN_TAB *end_tab= curr_tab + aggr_tables; + for ( ; curr_tab < end_tab; curr_tab++) + { + TABLE *tmp_table= curr_tab->table; + if (!tmp_table->is_created()) + continue; + tmp_table->file->extra(HA_EXTRA_RESET_STATE); + tmp_table->file->ha_delete_all_rows(); + } + } + clear_sj_tmp_tables(this); + if (current_ref_ptrs != items0) + { + set_items_ref_array(items0); + set_group_rpa= false; + } + + /* need to reset ref access state (see join_read_key) */ + if (join_tab) + { + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + tab->ref.key_err= TRUE; + } + } + + clear_sum_funcs(); + + if (no_rows_in_result_called) + { + /* Reset effect of possible no_rows_in_result() */ + List_iterator_fast<Item> it(fields_list); + Item *item; + no_rows_in_result_called= 0; + while ((item= it++)) + item->restore_to_before_no_rows_in_result(); + } + + if (!(select_options & SELECT_DESCRIBE)) + if (init_ftfuncs(thd, select_lex, MY_TEST(order))) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/** + Prepare join result. + + @details Prepare join result prior to join execution or describing. + Instantiate derived tables and get schema tables result if necessary. + + @return + TRUE An error during derived or schema tables instantiation. + FALSE Ok +*/ + +bool JOIN::prepare_result(List<Item> **columns_list) +{ + DBUG_ENTER("JOIN::prepare_result"); + + error= 0; + /* Create result tables for materialized views. */ + if (!zero_result_cause && + select_lex->handle_derived(thd->lex, DT_CREATE)) + goto err; + + if (result->prepare2(this)) + goto err; + + if ((select_lex->options & OPTION_SCHEMA_TABLE) && + get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC)) + goto err; + + DBUG_RETURN(FALSE); + +err: + error= 1; + DBUG_RETURN(TRUE); +} + + +/** + @retval + 0 ok + 1 error +*/ + + +bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite, + bool need_tmp_table, bool need_order, + bool distinct) +{ + DBUG_ENTER("JOIN::save_explain_data"); + DBUG_PRINT("enter", ("Save explain Select_lex: %u (%p) parent lex: %p stmt_lex: %p present select: %u (%p)", + select_lex->select_number, select_lex, + select_lex->parent_lex, thd->lex->stmt_lex, + (output->get_select(select_lex->select_number) ? + select_lex->select_number : 0), + (output->get_select(select_lex->select_number) ? + output->get_select(select_lex->select_number) + ->select_lex : NULL))); + /* + If there is SELECT in this statement with the same number it must be the + same SELECT + */ + DBUG_ASSERT(select_lex->select_number == FAKE_SELECT_LEX_ID || !output || + !output->get_select(select_lex->select_number) || + output->get_select(select_lex->select_number)->select_lex == + select_lex); + + if (select_lex->select_number != FAKE_SELECT_LEX_ID && + have_query_plan != JOIN::QEP_NOT_PRESENT_YET && + have_query_plan != JOIN::QEP_DELETED && // this happens when there was + // no QEP ever, but then + //cleanup() is called multiple times + output && // for "SET" command in SPs. + (can_overwrite? true: !output->get_select(select_lex->select_number))) + { + const char *message= NULL; + if (!table_count || !tables_list || zero_result_cause) + { + /* It's a degenerate join */ + message= zero_result_cause ? zero_result_cause : "No tables used"; + } + bool rc= save_explain_data_intern(thd->lex->explain, need_tmp_table, + need_order, distinct, message); + DBUG_RETURN(rc); + } + + /* + Can have join_tab==NULL for degenerate cases (e.g. SELECT .. UNION ... SELECT LIMIT 0) + */ + if (select_lex == select_lex->master_unit()->fake_select_lex && join_tab) + { + /* + This is fake_select_lex. It has no query plan, but we need to set up a + tracker for ANALYZE + */ + uint nr= select_lex->master_unit()->first_select()->select_number; + Explain_union *eu= output->get_union(nr); + explain= &eu->fake_select_lex_explain; + join_tab[0].tracker= eu->get_fake_select_lex_tracker(); + for (uint i=0 ; i < exec_join_tab_cnt() + aggr_tables; i++) + { + if (join_tab[i].filesort) + { + if (!(join_tab[i].filesort->tracker= + new Filesort_tracker(thd->lex->analyze_stmt))) + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} + + +void JOIN::exec() +{ + DBUG_EXECUTE_IF("show_explain_probe_join_exec_start", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); + ANALYZE_START_TRACKING(thd, &explain->time_tracker); + exec_inner(); + ANALYZE_STOP_TRACKING(thd, &explain->time_tracker); + + DBUG_EXECUTE_IF("show_explain_probe_join_exec_end", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); +} + + +void JOIN::exec_inner() +{ + List<Item> *columns_list= &fields_list; + DBUG_ENTER("JOIN::exec_inner"); + DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE); + + THD_STAGE_INFO(thd, stage_executing); + + /* + Enable LIMIT ROWS EXAMINED during query execution if: + (1) This JOIN is the outermost query (not a subquery or derived table) + This ensures that the limit is enabled when actual execution begins, + and not if a subquery is evaluated during optimization of the outer + query. + (2) This JOIN is not the result of a UNION. In this case do not apply the + limit in order to produce the partial query result stored in the + UNION temp table. + */ + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_exec(thd, "join_execution"); + trace_exec.add_select_number(select_lex->select_number); + Json_writer_array trace_steps(thd, "steps"); + + if (!select_lex->outer_select() && // (1) + select_lex != select_lex->master_unit()->fake_select_lex) // (2) + thd->lex->set_limit_rows_examined(); + + if (procedure) + { + procedure_fields_list= fields_list; + if (procedure->change_columns(thd, procedure_fields_list) || + result->prepare(procedure_fields_list, unit)) + { + thd->set_examined_row_count(0); + thd->limit_found_rows= 0; + DBUG_VOID_RETURN; + } + columns_list= &procedure_fields_list; + } + if (result->prepare2(this)) + DBUG_VOID_RETURN; + + if (!tables_list && (table_count || !select_lex->with_sum_func) && + !select_lex->have_window_funcs()) + { // Only test of functions + if (select_options & SELECT_DESCRIBE) + select_describe(this, FALSE, FALSE, FALSE, + (zero_result_cause?zero_result_cause:"No tables used")); + else + { + if (result->send_result_set_metadata(*columns_list, + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + { + DBUG_VOID_RETURN; + } + + /* + We have to test for 'conds' here as the WHERE may not be constant + even if we don't have any tables for prepared statements or if + conds uses something like 'rand()'. + If the HAVING clause is either impossible or always true, then + JOIN::having is set to NULL by optimize_cond. + In this case JOIN::exec must check for JOIN::having_value, in the + same way it checks for JOIN::cond_value. + */ + DBUG_ASSERT(error == 0); + if (cond_value != Item::COND_FALSE && + having_value != Item::COND_FALSE && + (!conds || conds->val_int()) && + (!having || having->val_int())) + { + if (do_send_rows && + (procedure ? (procedure->send_row(procedure_fields_list) || + procedure->end_of_records()): + result->send_data_with_check(fields_list, unit, 0)> 0)) + error= 1; + else + send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 : + thd->get_sent_row_count()); + } + else + send_records= 0; + if (likely(!error)) + { + join_free(); // Unlock all cursors + error= (int) result->send_eof(); + } + } + /* Single select (without union) always returns 0 or 1 row */ + thd->limit_found_rows= send_records; + thd->set_examined_row_count(0); + DBUG_VOID_RETURN; + } + + /* + Evaluate expensive constant conditions that were not evaluated during + optimization. Do not evaluate them for EXPLAIN statements as these + condtions may be arbitrarily costly, and because the optimize phase + might not have produced a complete executable plan for EXPLAINs. + */ + if (!zero_result_cause && + exec_const_cond && !(select_options & SELECT_DESCRIBE) && + !exec_const_cond->val_int()) + zero_result_cause= "Impossible WHERE noticed after reading const tables"; + + /* + We've called exec_const_cond->val_int(). This may have caused an error. + */ + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + + if (zero_result_cause) + { + if (select_lex->have_window_funcs() && send_row_on_empty_set()) + { + /* + The query produces just one row but it has window functions. + + The only way to compute the value of window function(s) is to + run the entire window function computation step (there is no shortcut). + */ + const_tables= table_count; + first_select= sub_select_postjoin_aggr; + } + else + { + (void) return_zero_rows(this, result, &select_lex->leaf_tables, + columns_list, + send_row_on_empty_set(), + select_options, + zero_result_cause, + having ? having : tmp_having, &all_fields); + DBUG_VOID_RETURN; + } + } + + /* + Evaluate all constant expressions with subqueries in the + ORDER/GROUP clauses to make sure that all subqueries return a + single row. The evaluation itself will trigger an error if that is + not the case. + */ + if (exec_const_order_group_cond.elements && + !(select_options & SELECT_DESCRIBE) && + !select_lex->pushdown_select) + { + List_iterator_fast<Item> const_item_it(exec_const_order_group_cond); + Item *cur_const_item; + StringBuffer<MAX_FIELD_WIDTH> tmp; + while ((cur_const_item= const_item_it++)) + { + tmp.set_buffer_if_not_allocated(&my_charset_bin); + cur_const_item->val_str(&tmp); + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + } + } + + if ((this->select_lex->options & OPTION_SCHEMA_TABLE) && + get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC)) + DBUG_VOID_RETURN; + + if (select_options & SELECT_DESCRIBE) + { + select_describe(this, need_tmp, + order != 0 && !skip_sort_order, + select_distinct, + !table_count ? "No tables used" : NullS); + DBUG_VOID_RETURN; + } + else if (select_lex->pushdown_select) + { + /* Execute the query pushed into a foreign engine */ + error= select_lex->pushdown_select->execute(); + DBUG_VOID_RETURN; + } + else + { + /* it's a const select, materialize it. */ + select_lex->mark_const_derived(zero_result_cause); + } + + /* + Initialize examined rows here because the values from all join parts + must be accumulated in examined_row_count. Hence every join + iteration must count from zero. + */ + join_examined_rows= 0; + + /* XXX: When can we have here thd->is_error() not zero? */ + if (unlikely(thd->is_error())) + { + error= thd->is_error(); + DBUG_VOID_RETURN; + } + + THD_STAGE_INFO(thd, stage_sending_data); + DBUG_PRINT("info", ("%s", thd->proc_info)); + result->send_result_set_metadata( + procedure ? procedure_fields_list : *fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + + error= result->view_structure_only() ? false : do_select(this, procedure); + /* Accumulate the counts from all join iterations of all join parts. */ + thd->inc_examined_row_count(join_examined_rows); + DBUG_PRINT("counts", ("thd->examined_row_count: %lu", + (ulong) thd->get_examined_row_count())); + + DBUG_VOID_RETURN; +} + + +/** + Clean up join. + + @return + Return error that hold JOIN. +*/ + +int +JOIN::destroy() +{ + DBUG_ENTER("JOIN::destroy"); + + DBUG_PRINT("info", ("select %p (%u) <> JOIN %p", + select_lex, select_lex->select_number, this)); + select_lex->join= 0; + + cond_equal= 0; + having_equal= 0; + + cleanup(1); + + if (join_tab) + { + for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS, + WITH_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + if (tab->aggr) + { + free_tmp_table(thd, tab->table); + delete tab->tmp_table_param; + tab->tmp_table_param= NULL; + tab->aggr= NULL; + } + tab->table= NULL; + } + } + + /* Cleanup items referencing temporary table columns */ + cleanup_item_list(tmp_all_fields1); + cleanup_item_list(tmp_all_fields3); + destroy_sj_tmp_tables(this); + delete_dynamic(&keyuse); + if (save_qep) + delete(save_qep); + if (ext_keyuses_for_splitting) + delete(ext_keyuses_for_splitting); + delete procedure; + DBUG_RETURN(error); +} + + +void JOIN::cleanup_item_list(List<Item> &items) const +{ + DBUG_ENTER("JOIN::cleanup_item_list"); + if (!items.is_empty()) + { + List_iterator_fast<Item> it(items); + Item *item; + while ((item= it++)) + item->cleanup(); + } + DBUG_VOID_RETURN; +} + + +/** + @brief + Look for provision of the select_handler interface by a foreign engine + + @param thd The thread handler + + @details + The function checks that this is an upper level select and if so looks + through its tables searching for one whose handlerton owns a + create_select call-back function. If the call of this function returns + a select_handler interface object then the server will push the select + query into this engine. + This is a responsibility of the create_select call-back function to + check whether the engine can execute the query. + + @retval the found select_handler if the search is successful + 0 otherwise +*/ + +select_handler *find_select_handler(THD *thd, + SELECT_LEX* select_lex) +{ + if (select_lex->next_select()) + return 0; + if (select_lex->master_unit()->outer_select()) + return 0; + + TABLE_LIST *tbl= nullptr; + // For SQLCOM_INSERT_SELECT the server takes TABLE_LIST + // from thd->lex->query_tables and skips its first table + // b/c it is the target table for the INSERT..SELECT. + if (thd->lex->sql_command != SQLCOM_INSERT_SELECT) + { + tbl= select_lex->join->tables_list; + } + else if (thd->lex->query_tables && + thd->lex->query_tables->next_global) + { + tbl= thd->lex->query_tables->next_global; + } + else + return 0; + + for (;tbl; tbl= tbl->next_global) + { + if (!tbl->table) + continue; + handlerton *ht= tbl->table->file->partition_ht(); + if (!ht->create_select) + continue; + select_handler *sh= ht->create_select(thd, select_lex); + return sh; + } + return 0; +} + + +/** + An entry point to single-unit select (a select without UNION). + + @param thd thread handler + @param rref_pointer_array a reference to ref_pointer_array of + the top-level select_lex for this query + @param tables list of all tables used in this query. + The tables have been pre-opened. + @param fields list of items in SELECT list of the top-level + select + e.g. SELECT a, b, c FROM t1 will have Item_field + for a, b and c in this list. + @param conds top level item of an expression representing + WHERE clause of the top level select + @param og_num total number of ORDER BY and GROUP BY clauses + arguments + @param order linked list of ORDER BY agruments + @param group linked list of GROUP BY arguments + @param having top level item of HAVING expression + @param proc_param list of PROCEDUREs + @param select_options select options (BIG_RESULT, etc) + @param result an instance of result set handling class. + This object is responsible for send result + set rows to the client or inserting them + into a table. + @param select_lex the only SELECT_LEX of this query + @param unit top-level UNIT of this query + UNIT is an artificial object created by the + parser for every SELECT clause. + e.g. + SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2) + has 2 unions. + + @retval + FALSE success + @retval + TRUE an error +*/ + +bool +mysql_select(THD *thd, TABLE_LIST *tables, List<Item> &fields, COND *conds, + uint og_num, ORDER *order, ORDER *group, Item *having, + ORDER *proc_param, ulonglong select_options, select_result *result, + SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex) +{ + int err= 0; + bool free_join= 1; + DBUG_ENTER("mysql_select"); + + if (!fields.is_empty()) + select_lex->context.resolve_in_select_list= true; + JOIN *join; + if (select_lex->join != 0) + { + join= select_lex->join; + /* + is it single SELECT in derived table, called in derived table + creation + */ + if (select_lex->get_linkage() != DERIVED_TABLE_TYPE || + (select_options & SELECT_DESCRIBE)) + { + if (select_lex->get_linkage() != GLOBAL_OPTIONS_TYPE) + { + /* + Original join tabs might be overwritten at first + subselect execution. So we need to restore them. + */ + Item_subselect *subselect= select_lex->master_unit()->item; + if (subselect && subselect->is_uncacheable() && join->reinit()) + DBUG_RETURN(TRUE); + } + else + { + if ((err= join->prepare(tables, conds, og_num, order, false, group, + having, proc_param, select_lex, unit))) + { + goto err; + } + } + } + free_join= 0; + join->select_options= select_options; + } + else + { + if (thd->lex->describe) + select_options|= SELECT_DESCRIBE; + + /* + When in EXPLAIN, delay deleting the joins so that they are still + available when we're producing EXPLAIN EXTENDED warning text. + */ + if (select_options & SELECT_DESCRIBE) + free_join= 0; + + if (!(join= new (thd->mem_root) JOIN(thd, fields, select_options, result))) + DBUG_RETURN(TRUE); + THD_STAGE_INFO(thd, stage_init); + thd->lex->used_tables=0; + if ((err= join->prepare(tables, conds, og_num, order, false, group, having, + proc_param, select_lex, unit))) + { + goto err; + } + } + + thd->get_stmt_da()->reset_current_row_for_warning(1); + /* Look for a table owned by an engine with the select_handler interface */ + select_lex->pushdown_select= find_select_handler(thd, select_lex); + + if ((err= join->optimize())) + { + goto err; // 1 + } + + if (thd->lex->describe & DESCRIBE_EXTENDED) + { + join->conds_history= join->conds; + join->having_history= (join->having?join->having:join->tmp_having); + } + + if (unlikely(thd->is_error())) + goto err; + + join->exec(); + + if (thd->lex->describe & DESCRIBE_EXTENDED) + { + select_lex->where= join->conds_history; + select_lex->having= join->having_history; + } + +err: + + if (select_lex->pushdown_select) + { + delete select_lex->pushdown_select; + select_lex->pushdown_select= NULL; + } + + if (free_join) + { + THD_STAGE_INFO(thd, stage_end); + err|= (int)(select_lex->cleanup()); + DBUG_RETURN(err || thd->is_error()); + } + DBUG_RETURN(join->error ? join->error: err); +} + + +/** + Approximate how many records are going to be returned by this table in this + select with this key. + + @param thd Thread handle + @param select Select to be examined + @param table The table of interest + @param keys The keys of interest + @param limit Maximum number of rows of interest + @param quick_count Pointer to where we want the estimate written + + @return Status + @retval false Success + @retval true Error + +*/ + +static bool get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit, + ha_rows *quick_count) +{ + quick_select_return error; + DBUG_ENTER("get_quick_record_count"); + uchar buff[STACK_BUFF_ALLOC]; + if (unlikely(check_stack_overrun(thd, STACK_MIN_SIZE, buff))) + DBUG_RETURN(false); // Fatal error flag is set + if (select) + { + select->head=table; + table->reginfo.impossible_range=0; + error= select->test_quick_select(thd, *(key_map *)keys, (table_map) 0, + limit, 0, FALSE, + TRUE, /* remove_where_parts*/ + FALSE, TRUE); + + if (error == SQL_SELECT::OK && select->quick) + { + *quick_count= select->quick->records; + DBUG_RETURN(false); + } + if (error == SQL_SELECT::IMPOSSIBLE_RANGE) + { + table->reginfo.impossible_range=1; + *quick_count= 0; + DBUG_RETURN(false); + } + if (unlikely(error == SQL_SELECT::ERROR)) + DBUG_RETURN(true); + + DBUG_PRINT("warning",("Couldn't use record count on const keypart")); + } + *quick_count= HA_POS_ERROR; + DBUG_RETURN(false); /* This shouldn't happen */ +} + +/* + This structure is used to collect info on potentially sargable + predicates in order to check whether they become sargable after + reading const tables. + We form a bitmap of indexes that can be used for sargable predicates. + Only such indexes are involved in range analysis. +*/ +struct SARGABLE_PARAM +{ + Field *field; /* field against which to check sargability */ + Item **arg_value; /* values of potential keys for lookups */ + uint num_values; /* number of values in the above array */ +}; + + +/* + Mark all tables inside a join nest as constant. + + @detail This is called when there is a local "Impossible WHERE" inside + a multi-table LEFT JOIN. +*/ + +void mark_join_nest_as_const(JOIN *join, + TABLE_LIST *join_nest, + table_map *found_const_table_map, + uint *const_count) +{ + List_iterator<TABLE_LIST> it(join_nest->nested_join->join_list); + TABLE_LIST *tbl; + Json_writer_object emb_obj(join->thd); + Json_writer_object trace_obj(join->thd, "mark_join_nest_as_const"); + Json_writer_array trace_array(join->thd, "members"); + + while ((tbl= it++)) + { + if (tbl->nested_join) + { + mark_join_nest_as_const(join, tbl, found_const_table_map, const_count); + continue; + } + JOIN_TAB *tab= tbl->table->reginfo.join_tab; + + if (!(join->const_table_map & tab->table->map)) + { + tab->type= JT_CONST; + tab->info= ET_IMPOSSIBLE_ON_CONDITION; + tab->table->const_table= 1; + + join->const_table_map|= tab->table->map; + *found_const_table_map|= tab->table->map; + set_position(join,(*const_count)++,tab,(KEYUSE*) 0); + mark_as_null_row(tab->table); // All fields are NULL + + trace_array.add_table_name(tab->table); + } + } +} + + +/* + @brief Get the condition that can be used to do range analysis/partition + pruning/etc + + @detail + Figure out which condition we can use: + - For INNER JOIN, we use the WHERE, + - "t1 LEFT JOIN t2 ON ..." uses t2's ON expression + - "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression. +*/ + +static Item **get_sargable_cond(JOIN *join, TABLE *table) +{ + Item **retval; + if (table->pos_in_table_list->on_expr) + { + /* + This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN + t2 ON cond". Use the condition cond. + */ + retval= &table->pos_in_table_list->on_expr; + } + else if (table->pos_in_table_list->embedding && + !table->pos_in_table_list->embedding->sj_on_expr) + { + /* + This is the inner side of a multi-table outer join. Use the + appropriate ON expression. + */ + retval= &(table->pos_in_table_list->embedding->on_expr); + } + else + { + /* The table is not inner wrt some LEFT JOIN. Use the WHERE clause */ + retval= &join->conds; + } + return retval; +} + + +/** + Calculate the best possible join and initialize the join structure. + + @retval + 0 ok + @retval + 1 Fatal error +*/ + +static bool +make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, + DYNAMIC_ARRAY *keyuse_array) +{ + int error= 0; + TABLE *UNINIT_VAR(table); /* inited in all loops */ + uint i,table_count,const_count,key; + uint sort_space; + table_map found_const_table_map, all_table_map; + key_map const_ref, eq_part; + bool has_expensive_keyparts; + TABLE **table_vector; + JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector; + KEYUSE *keyuse,*start_keyuse; + table_map outer_join=0; + table_map no_rows_const_tables= 0; + SARGABLE_PARAM *sargables= 0; + List_iterator<TABLE_LIST> ti(tables_list); + TABLE_LIST *tables; + THD *thd= join->thd; + DBUG_ENTER("make_join_statistics"); + + table_count=join->table_count; + + /* + best_extension_by_limited_search need sort space for 2POSITIION + objects per remaining table, which gives us + 2*(T + T-1 + T-2 + T-3...1 POSITIONS) = 2*(T+1)/2*T = (T*T+T) + */ + join->sort_space= sort_space= (table_count*table_count + table_count); + + /* + best_positions is ok to allocate with alloc() as we copy things to it with + memcpy() + */ + + if (!multi_alloc_root(join->thd->mem_root, + &stat, sizeof(JOIN_TAB)*(table_count), + &stat_ref, sizeof(JOIN_TAB*)* MAX_TABLES, + &stat_vector, sizeof(JOIN_TAB*)* (table_count +1), + &table_vector, sizeof(TABLE*)*(table_count*2), + &join->positions, sizeof(POSITION)*(table_count + 1), + &join->sort_positions, sizeof(POSITION)*(sort_space), + &join->best_positions, + sizeof(POSITION)*(table_count + 1), + NullS)) + DBUG_RETURN(1); + + /* The following should be optimized to only clear critical things */ + bzero((void*)stat, sizeof(JOIN_TAB)* table_count); + join->top_join_tab_count= table_count; + + /* Initialize POSITION objects */ + for (i=0 ; i <= table_count ; i++) + (void) new ((char*) (join->positions + i)) POSITION; + for (i=0 ; i < sort_space ; i++) + (void) new ((char*) (join->sort_positions + i)) POSITION; + + join->best_ref= stat_vector; + + stat_end=stat+table_count; + found_const_table_map= all_table_map=0; + const_count=0; + + for (s= stat, i= 0; (tables= ti++); s++, i++) + { + TABLE_LIST *embedding= tables->embedding; + stat_vector[i]=s; + table_vector[i]=s->table=table=tables->table; + s->tab_list= tables; + table->pos_in_table_list= tables; + error= tables->fetch_number_of_rows(); + set_statistics_for_table(join->thd, table); + bitmap_clear_all(&table->cond_set); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + const bool all_partitions_pruned_away= table->all_partitions_pruned_away; +#else + const bool all_partitions_pruned_away= FALSE; +#endif + + DBUG_EXECUTE_IF("bug11747970_raise_error", + { join->thd->set_killed(KILL_QUERY_HARD); }); + if (unlikely(error)) + { + table->file->print_error(error, MYF(0)); + goto error; + } + table->opt_range_keys.clear_all(); + table->intersect_keys.clear_all(); + table->reginfo.join_tab=s; + table->reginfo.not_exists_optimize=0; + bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); + all_table_map|= table->map; + s->preread_init_done= FALSE; + s->join=join; + + s->dependent= tables->dep_tables; + if (tables->schema_table) + table->file->stats.records= table->used_stat_records= 2; + table->opt_range_condition_rows= table->stat_records(); + + s->on_expr_ref= &tables->on_expr; + if (*s->on_expr_ref) + { + /* s is the only inner table of an outer join */ + if (!table->is_filled_at_execution() && + ((!table->file->stats.records && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || + all_partitions_pruned_away) && !embedding) + { // Empty table + s->dependent= 0; // Ignore LEFT JOIN depend. + no_rows_const_tables |= table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + continue; + } + outer_join|= table->map; + s->embedding_map= 0; + for (;embedding; embedding= embedding->embedding) + s->embedding_map|= embedding->nested_join->nj_map; + continue; + } + if (embedding) + { + /* s belongs to a nested join, maybe to several embedded joins */ + s->embedding_map= 0; + bool inside_an_outer_join= FALSE; + do + { + /* + If this is a semi-join nest, skip it, and proceed upwards. Maybe + we're in some outer join nest + */ + if (embedding->sj_on_expr) + { + embedding= embedding->embedding; + continue; + } + inside_an_outer_join= TRUE; + NESTED_JOIN *nested_join= embedding->nested_join; + s->embedding_map|=nested_join->nj_map; + s->dependent|= embedding->dep_tables; + embedding= embedding->embedding; + outer_join|= nested_join->used_tables; + } + while (embedding); + if (inside_an_outer_join) + continue; + } + if (!table->is_filled_at_execution() && + (table->s->system || + (table->file->stats.records <= 1 && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || + all_partitions_pruned_away) && + !s->dependent && + !table->fulltext_searched && !join->no_const_tables) + { + set_position(join,const_count++,s,(KEYUSE*) 0); + no_rows_const_tables |= table->map; + } + + /* SJ-Materialization handling: */ + if (table->pos_in_table_list->jtbm_subselect && + table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + set_position(join,const_count++,s,(KEYUSE*) 0); + no_rows_const_tables |= table->map; + } + } + + stat_vector[i]=0; + join->outer_join=outer_join; + + if (join->outer_join) + { + /* + Build transitive closure for relation 'to be dependent on'. + This will speed up the plan search for many cases with outer joins, + as well as allow us to catch illegal cross references/ + Warshall's algorithm is used to build the transitive closure. + As we use bitmaps to represent the relation the complexity + of the algorithm is O((number of tables)^2). + + The classic form of the Warshall's algorithm would look like: + for (i= 0; i < table_count; i++) + { + for (j= 0; j < table_count; j++) + { + for (k= 0; k < table_count; k++) + { + if (bitmap_is_set(stat[j].dependent, i) && + bitmap_is_set(stat[i].dependent, k)) + bitmap_set_bit(stat[j].dependent, k); + } + } + } + */ + + for (s= stat ; s < stat_end ; s++) + { + table= s->table; + for (JOIN_TAB *t= stat ; t < stat_end ; t++) + { + if (t->dependent & table->map) + t->dependent |= table->reginfo.join_tab->dependent; + } + if (outer_join & s->table->map) + s->table->maybe_null= 1; + } + /* Catch illegal cross references for outer joins */ + for (i= 0, s= stat ; i < table_count ; i++, s++) + { + if (s->dependent & s->table->map) + { + join->table_count=0; // Don't use join->table + my_message(ER_WRONG_OUTER_JOIN, + ER_THD(join->thd, ER_WRONG_OUTER_JOIN), MYF(0)); + goto error; + } + s->key_dependent= s->dependent; + } + } + + { + for (JOIN_TAB *s= stat ; s < stat_end ; s++) + { + TABLE_LIST *tl= s->table->pos_in_table_list; + if (tl->embedding && tl->embedding->sj_subq_pred) + { + s->embedded_dependent= tl->embedding->original_subq_pred_used_tables; + } + } + } + + if (thd->trace_started()) + trace_table_dependencies(thd, stat, join->table_count); + + if (join->conds || outer_join) + { + if (update_ref_and_keys(thd, keyuse_array, stat, join->table_count, + join->conds, ~outer_join, join->select_lex, &sargables)) + goto error; + /* + Keyparts without prefixes may be useful if this JOIN is a subquery, and + if the subquery may be executed via the IN-EXISTS strategy. + */ + bool skip_unprefixed_keyparts= + !(join->is_in_subquery() && + join->unit->item->get_IN_subquery()->test_strategy(SUBS_IN_TO_EXISTS)); + + if (keyuse_array->elements && + sort_and_filter_keyuse(join, keyuse_array, + skip_unprefixed_keyparts)) + goto error; + DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array);); + if (thd->trace_started()) + print_keyuse_array_for_trace(thd, keyuse_array); + } + + join->const_table_map= no_rows_const_tables; + join->const_tables= const_count; + eliminate_tables(join); + join->const_table_map &= ~no_rows_const_tables; + const_count= join->const_tables; + found_const_table_map= join->const_table_map; + + /* Read tables with 0 or 1 rows (system tables) */ + for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count; + p_pos < p_end ; + p_pos++) + { + s= p_pos->table; + if (! (s->table->map & join->eliminated_tables)) + { + int tmp; + s->type=JT_SYSTEM; + join->const_table_map|=s->table->map; + if ((tmp=join_read_const_table(join->thd, s, p_pos))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + { + found_const_table_map|= s->table->map; + s->table->pos_in_table_list->optimized_away= TRUE; + } + } + } + + /* loop until no more const tables are found */ + int ref_changed; + do + { + ref_changed = 0; + more_const_tables_found: + + /* + We only have to loop from stat_vector + const_count as + set_position() will move all const_tables first in stat_vector + */ + + for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) + { + table=s->table; + + if (table->is_filled_at_execution()) + continue; + + /* + If equi-join condition by a key is null rejecting and after a + substitution of a const table the key value happens to be null + then we can state that there are no matches for this equi-join. + */ + if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map && + !(table->map & join->eliminated_tables)) + { + /* + When performing an outer join operation if there are no matching rows + for the single row of the outer table all the inner tables are to be + null complemented and thus considered as constant tables. + Here we apply this consideration to the case of outer join operations + with a single inner table only because the case with nested tables + would require a more thorough analysis. + TODO. Apply single row substitution to null complemented inner tables + for nested outer join operations. + */ + while (keyuse->table == table) + { + if (!keyuse->is_for_hash_join() && + !(keyuse->val->used_tables() & ~join->const_table_map) && + keyuse->val->is_null() && keyuse->null_rejecting) + { + s->type= JT_CONST; + s->table->const_table= 1; + mark_as_null_row(table); + found_const_table_map|= table->map; + join->const_table_map|= table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + goto more_const_tables_found; + } + keyuse++; + } + } + + if (s->dependent) // If dependent on some table + { + // All dep. must be constants + if (s->dependent & ~(found_const_table_map)) + continue; + if (table->file->stats.records <= 1L && + (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && + !table->pos_in_table_list->embedding && + !((outer_join & table->map) && + (*s->on_expr_ref)->is_expensive())) + { // system table + int tmp= 0; + s->type=JT_SYSTEM; + join->const_table_map|=table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + if ((tmp= join_read_const_table(join->thd, s, join->positions+const_count-1))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + found_const_table_map|= table->map; + continue; + } + } + /* check if table can be read by key or table only uses const refs */ + if ((keyuse=s->keyuse)) + { + s->type= JT_REF; + while (keyuse->table == table) + { + if (keyuse->is_for_hash_join()) + { + keyuse++; + continue; + } + start_keyuse=keyuse; + key=keyuse->key; + s->keys.set_bit(key); // TODO: remove this ? + + const_ref.clear_all(); + eq_part.clear_all(); + has_expensive_keyparts= false; + do + { + if (keyuse->val->type() != Item::NULL_ITEM && + !keyuse->optimize && + keyuse->keypart != FT_KEYPART) + { + if (!((~found_const_table_map) & keyuse->used_tables)) + { + const_ref.set_bit(keyuse->keypart); + if (keyuse->val->is_expensive()) + has_expensive_keyparts= true; + } + eq_part.set_bit(keyuse->keypart); + } + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + + TABLE_LIST *embedding= table->pos_in_table_list->embedding; + /* + TODO (low priority): currently we ignore the const tables that + are within a semi-join nest which is within an outer join nest. + The effect of this is that we don't do const substitution for + such tables. + */ + KEY *keyinfo= table->key_info + key; + uint key_parts= table->actual_n_key_parts(keyinfo); + if (eq_part.is_prefix(key_parts) && + !table->fulltext_searched && + (!embedding || (embedding->sj_on_expr && !embedding->embedding))) + { + key_map base_part, base_const_ref, base_eq_part; + base_part.set_prefix(keyinfo->user_defined_key_parts); + base_const_ref= const_ref; + base_const_ref.intersect(base_part); + base_eq_part= eq_part; + base_eq_part.intersect(base_part); + if (table->actual_key_flags(keyinfo) & HA_NOSAME) + { + + if (base_const_ref == base_eq_part && + !has_expensive_keyparts && + !((outer_join & table->map) && + (*s->on_expr_ref)->is_expensive())) + { // Found everything for ref. + int tmp; + ref_changed = 1; + s->type= JT_CONST; + join->const_table_map|=table->map; + set_position(join,const_count++,s,start_keyuse); + /* create_ref_for_key will set s->table->const_table */ + if (create_ref_for_key(join, s, start_keyuse, FALSE, + found_const_table_map)) + goto error; + if ((tmp=join_read_const_table(join->thd, s, + join->positions+const_count-1))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + found_const_table_map|= table->map; + break; + } + } + else if (base_const_ref == base_eq_part) + s->const_keys.set_bit(key); + } + } + } + } + } while (ref_changed); + + join->sort_by_table= get_sort_by_table(join->order, join->group_list, + join->select_lex->leaf_tables, + join->const_table_map); + /* + Update info on indexes that can be used for search lookups as + reading const tables may has added new sargable predicates. + */ + if (const_count && sargables) + { + for( ; sargables->field ; sargables++) + { + Field *field= sargables->field; + JOIN_TAB *join_tab= field->table->reginfo.join_tab; + key_map possible_keys= field->key_start; + possible_keys.intersect(field->table->keys_in_use_for_query); + bool is_const= 1; + for (uint j=0; j < sargables->num_values; j++) + is_const&= sargables->arg_value[j]->const_item(); + if (is_const) + join_tab[0].const_keys.merge(possible_keys); + } + } + + join->impossible_where= false; + if (join->conds && const_count) + { + Item* &conds= join->conds; + COND_EQUAL *orig_cond_equal = join->cond_equal; + + conds->update_used_tables(); + conds= conds->remove_eq_conds(join->thd, &join->cond_value, true); + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + join->cond_equal= &((Item_cond_and*) conds)->m_cond_equal; + join->select_lex->where= conds; + if (join->cond_value == Item::COND_FALSE) + { + join->impossible_where= true; + conds= (Item*) Item_false; + } + + join->cond_equal= NULL; + if (conds) + { + if (conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + join->cond_equal= (&((Item_cond_and *) conds)->m_cond_equal); + else if (conds->type() == Item::FUNC_ITEM && + ((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC) + { + if (!join->cond_equal) + join->cond_equal= new COND_EQUAL; + join->cond_equal->current_level.empty(); + join->cond_equal->current_level.push_back((Item_equal*) conds, + join->thd->mem_root); + } + } + + if (orig_cond_equal != join->cond_equal) + { + /* + If join->cond_equal has changed all references to it from COND_EQUAL + objects associated with ON expressions must be updated. + */ + for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) + { + if (*s->on_expr_ref && s->cond_equal && + s->cond_equal->upper_levels == orig_cond_equal) + s->cond_equal->upper_levels= join->cond_equal; + } + } + } + + join->join_tab= stat; + join->make_notnull_conds_for_range_scans(); + + /* Calc how many (possible) matched records in each table */ + + /* + Todo: add a function so that we can add these Json_writer_objects + easily. + Another way would be to enclose them in a scope {}; + */ + { + Json_writer_object rows_estimation_wrapper(thd); + Json_writer_array rows_estimation(thd, "rows_estimation"); + + for (s=stat ; s < stat_end ; s++) + { + s->startup_cost= 0; + if (s->type == JT_SYSTEM || s->type == JT_CONST) + { + + Json_writer_object table_records(thd); + /* Only one matching row */ + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; + table_records.add_table_name(s) + .add("rows", s->found_records) + .add("cost", s->read_time) + .add("table_type", s->type == JT_CONST ? + "const" : + "system"); + continue; + } + /* Approximate found rows and time to read them */ + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + s->table->opt_range_condition_rows=s->records; + } + else + s->scan_time(); + + if (s->table->is_splittable()) + s->add_keyuses_for_splitting(); + + /* + Set a max range of how many seeks we can expect when using keys + This is can't be to high as otherwise we are likely to use + table scan. + */ + s->worst_seeks= MY_MIN((double) s->found_records / 10, + (double) s->read_time*3); + if (s->worst_seeks < 2.0) // Fix for small tables + s->worst_seeks=2.0; + + /* + Add to stat->const_keys those indexes for which all group fields or + all select distinct fields participate in one index. + */ + add_group_and_distinct_keys(join, s); + + s->table->cond_selectivity= 1.0; + + /* + Perform range analysis if there are keys it could use (1). + Don't do range analysis for materialized subqueries (2). + Don't do range analysis for materialized derived tables/views (3) + */ + if ((!s->const_keys.is_clear_all() || + !bitmap_is_clear_all(&s->table->cond_set)) && // (1) + !s->table->is_filled_at_execution() && // (2) + !(s->table->pos_in_table_list->derived && // (3) + s->table->pos_in_table_list->is_materialized_derived())) // (3) + { + bool impossible_range= FALSE; + ha_rows records= HA_POS_ERROR; + SQL_SELECT *select= 0; + Item **sargable_cond= NULL; + if (!s->const_keys.is_clear_all()) + { + sargable_cond= get_sargable_cond(join, s->table); + bool is_sargable_cond_of_where= sargable_cond == &join->conds; + + select= make_select(s->table, found_const_table_map, + found_const_table_map, + *sargable_cond, + (SORT_INFO*) 0, 1, &error); + if (!select) + goto error; + if (get_quick_record_count(join->thd, select, s->table, + &s->const_keys, join->row_limit, &records)) + { + /* There was an error in test_quick_select */ + delete select; + goto error; + } + /* + Range analyzer might have modified the condition. Put it the new + condition to where we got it from. + */ + *sargable_cond= select->cond; + + if (is_sargable_cond_of_where && + join->conds && join->conds->type() == Item::COND_ITEM && + ((Item_cond*) (join->conds))->functype() == + Item_func::COND_AND_FUNC) + join->cond_equal= &((Item_cond_and*) (join->conds))->m_cond_equal; + + s->quick=select->quick; + select->quick=0; + s->needed_reg=select->needed_reg; + impossible_range= records == 0 && s->table->reginfo.impossible_range; + if (join->thd->lex->sql_command == SQLCOM_SELECT && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER)) + s->table->init_cost_info_for_usable_range_rowid_filters(join->thd); + } + if (!impossible_range) + { + if (!sargable_cond) + sargable_cond= get_sargable_cond(join, s->table); + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + calculate_cond_selectivity_for_table(join->thd, s->table, + sargable_cond); + if (s->table->reginfo.impossible_range) + { + impossible_range= TRUE; + records= 0; + } + } + if (impossible_range) + { + /* + Impossible WHERE or ON expression + In case of ON, we mark that the we match one empty NULL row. + In case of WHERE, don't set found_const_table_map to get the + caller to abort with a zero row result. + */ + TABLE_LIST *emb= s->table->pos_in_table_list->embedding; + if (emb && !emb->sj_on_expr && !*s->on_expr_ref) + { + /* Mark all tables in a multi-table join nest as const */ + mark_join_nest_as_const(join, emb, &found_const_table_map, + &const_count); + } + else + { + join->const_table_map|= s->table->map; + set_position(join,const_count++,s,(KEYUSE*) 0); + s->type= JT_CONST; + s->table->const_table= 1; + if (*s->on_expr_ref) + { + /* Generate empty row */ + s->info= ET_IMPOSSIBLE_ON_CONDITION; + found_const_table_map|= s->table->map; + mark_as_null_row(s->table); // All fields are NULL + } + } + } + if (records != HA_POS_ERROR) + { + s->found_records=records; + s->read_time= s->quick ? s->quick->read_time : 0.0; + } + if (select) + delete select; + else + { + if (thd->trace_started()) + add_table_scan_values_to_trace(thd, s); + } + } + else + { + if (thd->trace_started()) + add_table_scan_values_to_trace(thd, s); + } + } + } + + if (pull_out_semijoin_tables(join)) + DBUG_RETURN(TRUE); + + join->join_tab=stat; + join->top_join_tab_count= table_count; + join->map2table=stat_ref; + join->table= table_vector; + join->const_tables=const_count; + join->found_const_table_map=found_const_table_map; + + if (join->const_tables != join->table_count) + optimize_keyuse(join, keyuse_array); + + DBUG_ASSERT(!join->conds || !join->cond_equal || + !join->cond_equal->current_level.elements || + (join->conds->type() == Item::COND_ITEM && + ((Item_cond*) (join->conds))->functype() == + Item_func::COND_AND_FUNC && + join->cond_equal == + &((Item_cond_and *) (join->conds))->m_cond_equal) || + (join->conds->type() == Item::FUNC_ITEM && + ((Item_func*) (join->conds))->functype() == + Item_func::MULT_EQUAL_FUNC && + join->cond_equal->current_level.elements == 1 && + join->cond_equal->current_level.head() == join->conds)); + + if (optimize_semijoin_nests(join, all_table_map)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + { + double records= 1; + SELECT_LEX_UNIT *unit= join->select_lex->master_unit(); + + /* Find an optimal join order of the non-constant tables. */ + if (join->const_tables != join->table_count) + { + if (choose_plan(join, all_table_map & ~join->const_table_map)) + goto error; + +#ifdef HAVE_valgrind + // JOIN::positions holds the current query plan. We've already + // made the plan choice, so we should only use JOIN::best_positions + for (uint k=join->const_tables; k < join->table_count; k++) + MEM_UNDEFINED(&join->positions[k], sizeof(join->positions[k])); +#endif + } + else + { + memcpy((uchar*) join->best_positions,(uchar*) join->positions, + sizeof(POSITION)*join->const_tables); + join->join_record_count= 1.0; + join->best_read=1.0; + } + + if (!(join->select_options & SELECT_DESCRIBE) && + unit->derived && unit->derived->is_materialized_derived()) + { + /* + Calculate estimated number of rows for materialized derived + table/view. + */ + for (i= 0; i < join->table_count ; i++) + if (double rr= join->best_positions[i].records_read) + records= COST_MULT(records, rr); + ha_rows rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; + set_if_smaller(rows, unit->lim.get_select_limit()); + join->select_lex->increase_derived_records(rows); + } + } + + if (join->choose_subquery_plan(all_table_map & ~join->const_table_map)) + goto error; + + DEBUG_SYNC(join->thd, "inside_make_join_statistics"); + + DBUG_RETURN(0); + +error: + /* + Need to clean up join_tab from TABLEs in case of error. + They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab + may not be assigned yet by this function (which is building join_tab). + Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke. + */ + { + TABLE_LIST *tmp_table; + List_iterator<TABLE_LIST> ti2(tables_list); + while ((tmp_table= ti2++)) + tmp_table->table->reginfo.join_tab= NULL; + } + DBUG_RETURN (1); +} + + +/***************************************************************************** + Check with keys are used and with tables references with tables + Updates in stat: + keys Bitmap of all used keys + const_keys Bitmap of all keys with may be used with quick_select + keyuse Pointer to possible keys +*****************************************************************************/ + + +/** + Merge new key definitions to old ones, remove those not used in both. + + This is called for OR between different levels. + + That is, the function operates on an array of KEY_FIELD elements which has + two parts: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two + parts of the OR condition. Our task is to produce an array of KEY_FIELD + elements that would correspond to "$LEFT_PART OR $RIGHT_PART". + + The rules for combining elements are as follows: + + (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)= + + = AND_ij (keyfieldA_i OR keyfieldB_j) + + We discard all (keyfieldA_i OR keyfieldB_j) that refer to different + fields. For those referring to the same field, the logic is as follows: + + t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different + we can't produce a single equality, + so produce nothing) + + t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1 + + t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set + KEY_OPTIMIZE_REF_OR_NULL flag + + The last one is for ref_or_null access. We have handling for this special + because it's needed for evaluating IN subqueries that are internally + transformed into + + @code + EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL) + @endcode + + See add_key_fields() for discussion of what is and_level. + + KEY_FIELD::null_rejecting is processed as follows: @n + result has null_rejecting=true if it is set for both ORed references. + for example: + - (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true + - (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false + + @todo + The result of this is that we're missing some 'ref' accesses. + OptimizerTeam: Fix this +*/ + +static KEY_FIELD * +merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end, + uint and_level) +{ + if (start == new_fields) + return start; // Impossible or + if (new_fields == end) + return start; // No new fields, skip all + + KEY_FIELD *first_free=new_fields; + + /* Mark all found fields in old array */ + for (; new_fields != end ; new_fields++) + { + for (KEY_FIELD *old=start ; old != first_free ; old++) + { + if (old->field == new_fields->field) + { + /* + NOTE: below const_item() call really works as "!used_tables()", i.e. + it can return FALSE where it is feasible to make it return TRUE. + + The cause is as follows: Some of the tables are already known to be + const tables (the detection code is in make_join_statistics(), + above the update_ref_and_keys() call), but we didn't propagate + information about this: TABLE::const_table is not set to TRUE, and + Item::update_used_tables() hasn't been called for each item. + The result of this is that we're missing some 'ref' accesses. + TODO: OptimizerTeam: Fix this + */ + if (!new_fields->val->const_item()) + { + /* + If the value matches, we can use the key reference. + If not, we keep it until we have examined all new values + */ + if (old->val->eq(new_fields->val, old->field->binary())) + { + old->level= and_level; + old->optimize= ((old->optimize & new_fields->optimize & + KEY_OPTIMIZE_EXISTS) | + ((old->optimize | new_fields->optimize) & + KEY_OPTIMIZE_REF_OR_NULL)); + old->null_rejecting= (old->null_rejecting && + new_fields->null_rejecting); + } + } + else if (old->eq_func && new_fields->eq_func && + old->val->eq_by_collation(new_fields->val, + old->field->binary(), + old->field->charset())) + + { + old->level= and_level; + old->optimize= ((old->optimize & new_fields->optimize & + KEY_OPTIMIZE_EXISTS) | + ((old->optimize | new_fields->optimize) & + KEY_OPTIMIZE_REF_OR_NULL)); + old->null_rejecting= (old->null_rejecting && + new_fields->null_rejecting); + } + else if (old->eq_func && new_fields->eq_func && + ((old->val->can_eval_in_optimize() && old->val->is_null()) || + (!new_fields->val->is_expensive() && + new_fields->val->is_null()))) + { + /* field = expression OR field IS NULL */ + old->level= and_level; + if (old->field->maybe_null()) + { + old->optimize= KEY_OPTIMIZE_REF_OR_NULL; + /* The referred expression can be NULL: */ + old->null_rejecting= 0; + } + /* + Remember the NOT NULL value unless the value does not depend + on other tables. + */ + if (!old->val->used_tables() && !old->val->is_expensive() && + old->val->is_null()) + old->val= new_fields->val; + } + else + { + /* + We are comparing two different const. In this case we can't + use a key-lookup on this so it's better to remove the value + and let the range optimzier handle it + */ + if (old == --first_free) // If last item + break; + *old= *first_free; // Remove old value + old--; // Retry this value + } + } + } + } + /* Remove all not used items */ + for (KEY_FIELD *old=start ; old != first_free ;) + { + if (old->level != and_level) + { // Not used in all levels + if (old == --first_free) + break; + *old= *first_free; // Remove old value + continue; + } + old++; + } + return first_free; +} + + +/* + Given a field, return its index in semi-join's select list, or UINT_MAX + + DESCRIPTION + Given a field, we find its table; then see if the table is within a + semi-join nest and if the field was in select list of the subselect. + If it was, we return field's index in the select list. The value is used + by LooseScan strategy. +*/ + +static uint get_semi_join_select_list_index(Field *field) +{ + uint res= UINT_MAX; + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= field->table->pos_in_table_list->embedding) && + emb_sj_nest->sj_on_expr) + { + Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred; + st_select_lex *subq_lex= subq_pred->unit->first_select(); + uint ncols= subq_pred->left_exp()->cols(); + if (ncols == 1) + { + Item *sel_item= subq_lex->ref_pointer_array[0]; + if (sel_item->type() == Item::FIELD_ITEM && + ((Item_field*)sel_item)->field->eq(field)) + { + res= 0; + } + } + else + { + for (uint i= 0; i < ncols; i++) + { + Item *sel_item= subq_lex->ref_pointer_array[i]; + if (sel_item->type() == Item::FIELD_ITEM && + ((Item_field*)sel_item)->field->eq(field)) + { + res= i; + break; + } + } + } + } + return res; +} + + +/** + Add a possible key to array of possible keys if it's usable as a key + + @param key_fields Pointer to add key, if usable + @param and_level And level, to be stored in KEY_FIELD + @param cond Condition predicate + @param field Field used in comparision + @param eq_func True if we used =, <=> or IS NULL + @param value Value used for comparison with field + @param num_values Number of values[] that we are comparing against + @param usable_tables Tables which can be used for key optimization + @param sargables IN/OUT Array of found sargable candidates + @param row_col_no if = n that > 0 then field is compared only + against the n-th component of row values + + @note + If we are doing a NOT NULL comparison on a NOT NULL field in a outer join + table, we store this to be able to do not exists optimization later. + + @returns + *key_fields is incremented if we stored a key in the array +*/ + +static void +add_key_field(JOIN *join, + KEY_FIELD **key_fields,uint and_level, Item_bool_func *cond, + Field *field, bool eq_func, Item **value, uint num_values, + table_map usable_tables, SARGABLE_PARAM **sargables, + uint row_col_no= 0) +{ + uint optimize= 0; + if (eq_func && + ((join->is_allowed_hash_join_access() && + field->hash_join_is_possible() && + !(field->table->pos_in_table_list->is_materialized_derived() && + field->table->is_created())) || + (field->table->pos_in_table_list->is_materialized_derived() && + !field->table->is_created() && !(field->flags & BLOB_FLAG)))) + { + optimize= KEY_OPTIMIZE_EQ; + } + else if (!(field->flags & PART_KEY_FLAG)) + { + // Don't remove column IS NULL on a LEFT JOIN table + if (eq_func && (*value)->type() == Item::NULL_ITEM && + field->table->maybe_null && !field->null_ptr) + { + optimize= KEY_OPTIMIZE_EXISTS; + DBUG_ASSERT(num_values == 1); + } + } + if (optimize != KEY_OPTIMIZE_EXISTS) + { + table_map used_tables=0; + bool optimizable=0; + for (uint i=0; i<num_values; i++) + { + Item *curr_val; + if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM) + { + Item_row *value_tuple= (Item_row *) (value[i]->real_item()); + curr_val= value_tuple->element_index(row_col_no - 1); + } + else + curr_val= value[i]; + table_map value_used_tables= curr_val->used_tables(); + used_tables|= value_used_tables; + if (!(value_used_tables & (field->table->map | RAND_TABLE_BIT))) + optimizable=1; + } + if (!optimizable) + return; + if (!(usable_tables & field->table->map)) + { + if (!eq_func || (*value)->type() != Item::NULL_ITEM || + !field->table->maybe_null || field->null_ptr) + return; // Can't use left join optimize + optimize= KEY_OPTIMIZE_EXISTS; + } + else + { + JOIN_TAB *stat=field->table->reginfo.join_tab; + key_map possible_keys=field->get_possible_keys(); + possible_keys.intersect(field->table->keys_in_use_for_query); + stat[0].keys.merge(possible_keys); // Add possible keys + + /* + Save the following cases: + Field op constant + Field LIKE constant where constant doesn't start with a wildcard + Field = field2 where field2 is in a different table + Field op formula + Field IS NULL + Field IS NOT NULL + Field BETWEEN ... + Field IN ... + */ + if (field->flags & PART_KEY_FLAG) + { + stat[0].key_dependent|= used_tables; + if (field->key_start.bits_set()) + stat[0].key_start_dependent= 1; + } + + bool is_const=1; + for (uint i=0; i<num_values; i++) + { + Item *curr_val; + if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM) + { + Item_row *value_tuple= (Item_row *) (value[i]->real_item()); + curr_val= value_tuple->element_index(row_col_no - 1); + } + else + curr_val= value[i]; + if (!(is_const&= curr_val->const_item())) + break; + } + if (is_const) + { + stat[0].const_keys.merge(possible_keys); + bitmap_set_bit(&field->table->cond_set, field->field_index); + } + else if (!eq_func) + { + /* + Save info to be able check whether this predicate can be + considered as sargable for range analysis after reading const tables. + We do not save info about equalities as update_const_equal_items + will take care of updating info on keys from sargable equalities. + */ + (*sargables)--; + (*sargables)->field= field; + (*sargables)->arg_value= value; + (*sargables)->num_values= num_values; + } + if (!eq_func) // eq_func is NEVER true when num_values > 1 + return; + } + } + /* + For the moment eq_func is always true. This slot is reserved for future + extensions where we want to remembers other things than just eq comparisons + */ + DBUG_ASSERT(eq_func); + /* Store possible eq field */ + (*key_fields)->field= field; + (*key_fields)->eq_func= eq_func; + (*key_fields)->val= *value; + (*key_fields)->cond= cond; + (*key_fields)->level= and_level; + (*key_fields)->optimize= optimize; + /* + If the condition we are analyzing is NULL-rejecting and at least + one side of the equalities is NULLable, mark the KEY_FIELD object as + null-rejecting. This property is used by: + - add_not_null_conds() to add "column IS NOT NULL" conditions + - best_access_path() to produce better estimates for NULL-able unique keys. + */ + { + if ((cond->functype() == Item_func::EQ_FUNC || + cond->functype() == Item_func::MULT_EQUAL_FUNC) && + ((*value)->maybe_null() || field->real_maybe_null())) + (*key_fields)->null_rejecting= true; + else + (*key_fields)->null_rejecting= false; + } + (*key_fields)->cond_guard= NULL; + + (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field); + (*key_fields)++; +} + +/** + Add possible keys to array of possible keys originated from a simple + predicate. + + @param key_fields Pointer to add key, if usable + @param and_level And level, to be stored in KEY_FIELD + @param cond Condition predicate + @param field_item Field item used for comparison + @param eq_func True if we used =, <=> or IS NULL + @param value Value used for comparison with field_item + @param num_values Number of values[] that we are comparing against + @param usable_tables Tables which can be used for key optimization + @param sargables IN/OUT Array of found sargable candidates + @param row_col_no if = n that > 0 then field is compared only + against the n-th component of row values + + @note + If field items f1 and f2 belong to the same multiple equality and + a key is added for f1, the the same key is added for f2. + + @returns + *key_fields is incremented if we stored a key in the array +*/ + +static void +add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level, + Item_bool_func *cond, Item *field_item, + bool eq_func, Item **val, + uint num_values, table_map usable_tables, + SARGABLE_PARAM **sargables, uint row_col_no= 0) +{ + Field *field= ((Item_field *) (field_item->real_item()))->field; + add_key_field(join, key_fields, and_level, cond, field, + eq_func, val, num_values, usable_tables, sargables, + row_col_no); + Item_equal *item_equal= field_item->get_item_equal(); + if (item_equal) + { + /* + Add to the set of possible key values every substitution of + the field for an equal field included into item_equal + */ + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + Field *equal_field= it.get_curr_field(); + if (!field->eq(equal_field)) + { + add_key_field(join, key_fields, and_level, cond, equal_field, + eq_func, val, num_values, usable_tables, + sargables, row_col_no); + } + } + } +} + + +/** + Check if an expression is a non-outer field. + + Checks if an expression is a field and belongs to the current select. + + @param field Item expression to check + + @return boolean + @retval TRUE the expression is a local field + @retval FALSE it's something else +*/ + +static bool +is_local_field (Item *field) +{ + return field->real_item()->type() == Item::FIELD_ITEM + && !(field->used_tables() & OUTER_REF_TABLE_BIT) + && !((Item_field *)field->real_item())->get_depended_from(); +} + + +/* + In this and other functions, and_level is a number that is ever-growing + and is different for the contents of every AND or OR clause. For example, + when processing clause + + (a AND b AND c) OR (x AND y) + + we'll have + * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1 + * KEY_FIELD elements for (x AND y) are assigned and_level=2 + * OR operation is performed, and whatever elements are left after it are + assigned and_level=3. + + The primary reason for having and_level attribute is the OR operation which + uses and_level to mark KEY_FIELDs that should get into the result of the OR + operation +*/ + + +void +Item_cond_and::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + List_iterator_fast<Item> li(*argument_list()); + KEY_FIELD *org_key_fields= *key_fields; + + Item *item; + while ((item=li++)) + item->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + for (; org_key_fields != *key_fields ; org_key_fields++) + org_key_fields->level= *and_level; +} + + +void +Item_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + List_iterator_fast<Item> li(*argument_list()); + KEY_FIELD *org_key_fields= *key_fields; + + (*and_level)++; + (li++)->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + Item *item; + while ((item=li++)) + { + KEY_FIELD *start_key_fields= *key_fields; + (*and_level)++; + item->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + *key_fields= merge_key_fields(org_key_fields,start_key_fields, + *key_fields, ++(*and_level)); + } +} + + +void +Item_func_trig_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* + Subquery optimization: Conditions that are pushed down into subqueries + are wrapped into Item_func_trig_cond. We process the wrapped condition + but need to set cond_guard for KEYUSE elements generated from it. + */ + if (!join->group_list && !join->order && + join->unit->item && + join->unit->item->substype() == Item_subselect::IN_SUBS && + !join->unit->is_unit_op()) + { + KEY_FIELD *save= *key_fields; + args[0]->add_key_fields(join, key_fields, and_level, usable_tables, + sargables); + // Indicate that this ref access candidate is for subquery lookup: + for (; save != *key_fields; save++) + save->cond_guard= get_trig_var(); + } +} + + +void +Item_func_between::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* + Build list of possible keys for 'a BETWEEN low AND high'. + It is handled similar to the equivalent condition + 'a >= low AND a <= high': + */ + Item_field *field_item; + bool equal_func= false; + uint num_values= 2; + + bool binary_cmp= (args[0]->real_item()->type() == Item::FIELD_ITEM) + ? ((Item_field*) args[0]->real_item())->field->binary() + : true; + /* + Additional optimization: If 'low = high': + Handle as if the condition was "t.key = low". + */ + if (!negated && args[1]->eq(args[2], binary_cmp)) + { + equal_func= true; + num_values= 1; + } + + /* + Append keys for 'field <cmp> value[]' if the + condition is of the form:: + '<field> BETWEEN value[1] AND value[2]' + */ + if (is_local_field(args[0])) + { + field_item= (Item_field *) (args[0]->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, equal_func, &args[1], + num_values, usable_tables, sargables); + } + /* + Append keys for 'value[0] <cmp> field' if the + condition is of the form: + 'value[0] BETWEEN field1 AND field2' + */ + for (uint i= 1; i <= num_values; i++) + { + if (is_local_field(args[i])) + { + field_item= (Item_field *) (args[i]->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, equal_func, args, + 1, usable_tables, sargables); + } + } +} + + +void +Item_func_in::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT)) + { + DBUG_ASSERT(arg_count != 2); + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[0]->real_item()), false, + args + 1, arg_count - 1, usable_tables, sargables); + } + else if (key_item()->type() == Item::ROW_ITEM && + !(used_tables() & OUTER_REF_TABLE_BIT)) + { + Item_row *key_row= (Item_row *) key_item(); + Item **key_col= key_row->addr(0); + uint row_cols= key_row->cols(); + for (uint i= 0; i < row_cols; i++, key_col++) + { + if (is_local_field(*key_col)) + { + Item_field *field_item= (Item_field *)((*key_col)->real_item()); + add_key_equal_fields(join, key_fields, *and_level, this, + field_item, false, args + 1, arg_count - 1, + usable_tables, sargables, i + 1); + } + } + } + +} + + +void +Item_func_ne::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (!(used_tables() & OUTER_REF_TABLE_BIT)) + { + /* + QQ: perhaps test for !is_local_field(args[1]) is not really needed here. + Other comparison functions, e.g. Item_func_le, Item_func_gt, etc, + do not have this test. See Item_bool_func2::add_key_fieldoptimize_op(). + Check with the optimizer team. + */ + if (is_local_field(args[0]) && !is_local_field(args[1])) + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[0]->real_item()), false, + &args[1], 1, usable_tables, sargables); + /* + QQ: perhaps test for !is_local_field(args[0]) is not really needed here. + */ + if (is_local_field(args[1]) && !is_local_field(args[0])) + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) (args[1]->real_item()), false, + &args[0], 1, usable_tables, sargables); + } +} + + +void +Item_func_like::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + if (is_local_field(args[0]) && with_sargable_pattern()) + { + /* + SELECT * FROM t1 WHERE field LIKE const_pattern + const_pattern starts with a non-wildcard character + */ + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), false, + args + 1, 1, usable_tables, sargables); + } +} + + +void +Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, + table_map usable_tables, + SARGABLE_PARAM **sargables, + bool equal_func) +{ + /* If item is of type 'field op field/constant' add it to key_fields */ + if (is_local_field(args[0])) + { + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), equal_func, + args + 1, 1, usable_tables, sargables); + } + if (is_local_field(args[1])) + { + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[1]->real_item(), equal_func, + args, 1, usable_tables, sargables); + } +} + + +void +Item_func_null_predicate::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, + table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + /* column_name IS [NOT] NULL */ + if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT)) + { + Item *tmp= new (join->thd->mem_root) Item_null(join->thd); + if (unlikely(!tmp)) // Should never be true + return; + add_key_equal_fields(join, key_fields, *and_level, this, + (Item_field*) args[0]->real_item(), + functype() == Item_func::ISNULL_FUNC, + &tmp, 1, usable_tables, sargables); + } +} + + +void +Item_equal::add_key_fields(JOIN *join, KEY_FIELD **key_fields, + uint *and_level, table_map usable_tables, + SARGABLE_PARAM **sargables) +{ + Item *const_item2= get_const(); + Item_equal_fields_iterator it(*this); + if (const_item2) + { + + /* + For each field field1 from item_equal consider the equality + field1=const_item as a condition allowing an index access of the table + with field1 by the keys value of field1. + */ + while (it++) + { + Field *equal_field= it.get_curr_field(); + add_key_field(join, key_fields, *and_level, this, equal_field, + TRUE, &const_item2, 1, usable_tables, sargables); + } + } + else + { + /* + Consider all pairs of different fields included into item_equal. + For each of them (field1, field1) consider the equality + field1=field2 as a condition allowing an index access of the table + with field1 by the keys value of field2. + */ + Item_equal_fields_iterator fi(*this); + while (fi++) + { + Field *field= fi.get_curr_field(); + Item *item; + while ((item= it++)) + { + Field *equal_field= it.get_curr_field(); + if (!field->eq(equal_field)) + { + add_key_field(join, key_fields, *and_level, this, field, + TRUE, &item, 1, usable_tables, + sargables); + } + } + it.rewind(); + } + } +} + + +static uint +max_part_bit(key_part_map bits) +{ + uint found; + for (found=0; bits & 1 ; found++,bits>>=1) ; + return found; +} + + +/** + Add a new keuse to the specified array of KEYUSE objects + + @param[in,out] keyuse_array array of keyuses to be extended + @param[in] key_field info on the key use occurrence + @param[in] key key number for the keyse to be added + @param[in] part key part for the keyuse to be added + + @note + The function builds a new KEYUSE object for a key use utilizing the info + on the left and right parts of the given key use extracted from the + structure key_field, the key number and key part for this key use. + The built object is added to the dynamic array keyuse_array. + + @retval 0 the built object is successfully added + @retval 1 otherwise +*/ + +static bool +add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field, + uint key, uint part) +{ + KEYUSE keyuse; + Field *field= key_field->field; + + keyuse.table= field->table; + keyuse.val= key_field->val; + keyuse.key= key; + if (!is_hash_join_key_no(key)) + { + keyuse.keypart=part; + keyuse.keypart_map= (key_part_map) 1 << part; + } + else + { + keyuse.keypart= field->field_index; + keyuse.keypart_map= (key_part_map) 0; + } + keyuse.used_tables= key_field->val->used_tables(); + keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; + keyuse.ref_table_rows= 0; + keyuse.null_rejecting= key_field->null_rejecting; + keyuse.cond_guard= key_field->cond_guard; + keyuse.sj_pred_no= key_field->sj_pred_no; + keyuse.validity_ref= 0; + return (insert_dynamic(keyuse_array,(uchar*) &keyuse)); +} + + +/* + Add all keys with uses 'field' for some keypart + If field->and_level != and_level then only mark key_part as const_part + + RETURN + 0 - OK + 1 - Out of memory. +*/ + +static LEX_CSTRING equal_str= { STRING_WITH_LEN("=") }; + +static bool +add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field) +{ + Field *field=key_field->field; + TABLE *form= field->table; + THD *thd= form->in_use; + + if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS)) + { + for (uint key=0 ; key < form->s->keys ; key++) + { + if (!(form->keys_in_use_for_query.is_set(key))) + continue; + if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL)) + continue; // ToDo: ft-keys in non-ft queries. SerG + + KEY *keyinfo= form->key_info+key; + uint key_parts= form->actual_n_key_parts(keyinfo); + for (uint part=0 ; part < key_parts ; part++) + { + if (field->eq(form->key_info[key].key_part[part].field)) + { + Data_type_compatibility compat= + field->can_optimize_keypart_ref(key_field->cond, key_field->val); + if (compat == Data_type_compatibility::OK) + { + if (add_keyuse(keyuse_array, key_field, key, part)) + return TRUE; + } + else if (thd->give_notes_for_unusable_keys()) + { + field->raise_note_cannot_use_key_part(thd, key, part, + equal_str, + key_field->val, + compat); + } + } + } + } + if (field->hash_join_is_possible() && + (key_field->optimize & KEY_OPTIMIZE_EQ) && + key_field->val->used_tables()) + { + if (field->can_optimize_hash_join(key_field->cond, key_field->val) != + Data_type_compatibility::OK) + return false; + if (form->is_splittable()) + form->add_splitting_info_for_key_field(key_field); + /* + If a key use is extracted from an equi-join predicate then it is + added not only as a key use for every index whose component can + be evalusted utilizing this key use, but also as a key use for + hash join. Such key uses are marked with a special key number. + */ + if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0)) + return TRUE; + } + } + return FALSE; +} + +static bool +add_ft_keys(DYNAMIC_ARRAY *keyuse_array, + JOIN_TAB *stat,COND *cond,table_map usable_tables) +{ + Item_func_match *cond_func=NULL; + + if (!cond) + return FALSE; + + if (cond->type() == Item::FUNC_ITEM) + { + Item_func *func=(Item_func *)cond; + Item_func::Functype functype= func->functype(); + if (functype == Item_func::FT_FUNC) + cond_func=(Item_func_match *)cond; + else if (func->argument_count() == 2) + { + Item *arg0=(Item *)(func->arguments()[0]), + *arg1=(Item *)(func->arguments()[1]); + if (arg1->const_item() && arg1->cols() == 1 && + arg0->type() == Item::FUNC_ITEM && + ((Item_func *) arg0)->functype() == Item_func::FT_FUNC && + ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) || + (functype == Item_func::GT_FUNC && arg1->val_real() >=0))) + cond_func= (Item_func_match *) arg0; + else if (arg0->const_item() && arg0->cols() == 1 && + arg1->type() == Item::FUNC_ITEM && + ((Item_func *) arg1)->functype() == Item_func::FT_FUNC && + ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) || + (functype == Item_func::LT_FUNC && arg0->val_real() >=0))) + cond_func= (Item_func_match *) arg1; + } + } + else if (cond->type() == Item::COND_ITEM) + { + List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); + + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + Item *item; + while ((item=li++)) + { + if (add_ft_keys(keyuse_array,stat,item,usable_tables)) + return TRUE; + } + } + } + + if (!cond_func || cond_func->key == NO_SUCH_KEY || + !(usable_tables & cond_func->table->map)) + return FALSE; + + KEYUSE keyuse; + keyuse.table= cond_func->table; + keyuse.val = cond_func; + keyuse.key = cond_func->key; + keyuse.keypart= FT_KEYPART; + keyuse.used_tables=cond_func->key_item()->used_tables(); + keyuse.optimize= 0; + keyuse.ref_table_rows= 0; + keyuse.keypart_map= 0; + keyuse.sj_pred_no= UINT_MAX; + keyuse.validity_ref= 0; + keyuse.null_rejecting= FALSE; + return insert_dynamic(keyuse_array,(uchar*) &keyuse); +} + + +static int +sort_keyuse(KEYUSE *a,KEYUSE *b) +{ + int res; + if (a->table->tablenr != b->table->tablenr) + return (int) (a->table->tablenr - b->table->tablenr); + if (a->key != b->key) + return (int) (a->key - b->key); + if (a->key == MAX_KEY && b->key == MAX_KEY && + a->used_tables != b->used_tables) + return (int) ((ulong) a->used_tables - (ulong) b->used_tables); + if (a->keypart != b->keypart) + return (int) (a->keypart - b->keypart); + // Place const values before other ones + if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) - + MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT)))) + return res; + /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */ + return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) - + (b->optimize & KEY_OPTIMIZE_REF_OR_NULL)); +} + + +/* + Add to KEY_FIELD array all 'ref' access candidates within nested join. + + This function populates KEY_FIELD array with entries generated from the + ON condition of the given nested join, and does the same for nested joins + contained within this nested join. + + @param[in] nested_join_table Nested join pseudo-table to process + @param[in,out] end End of the key field array + @param[in,out] and_level And-level + @param[in,out] sargables Array of found sargable candidates + + + @note + We can add accesses to the tables that are direct children of this nested + join (1), and are not inner tables w.r.t their neighbours (2). + + Example for #1 (outer brackets pair denotes nested join this function is + invoked for): + @code + ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond + @endcode + Example for #2: + @code + ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond + @endcode + In examples 1-2 for condition cond, we can add 'ref' access candidates to + t1 only. + Example #3: + @code + ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond + @endcode + Here we can add 'ref' access candidates for t1 and t2, but not for t3. +*/ + +static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table, + KEY_FIELD **end, uint *and_level, + SARGABLE_PARAM **sargables) +{ + List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list); + List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list); + bool have_another = FALSE; + table_map tables= 0; + TABLE_LIST *table; + DBUG_ASSERT(nested_join_table->nested_join); + + while ((table= li++) || (have_another && (li=li2, have_another=FALSE, + (table= li++)))) + { + if (table->nested_join) + { + if (!table->on_expr) + { + /* It's a semi-join nest. Walk into it as if it wasn't a nest */ + have_another= TRUE; + li2= li; + li= List_iterator<TABLE_LIST>(table->nested_join->join_list); + } + else + add_key_fields_for_nj(join, table, end, and_level, sargables); + } + else + if (!table->on_expr) + tables |= table->table->map; + } + if (nested_join_table->on_expr) + nested_join_table->on_expr->add_key_fields(join, end, and_level, tables, + sargables); +} + + +void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table) +{ + List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list); + List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list); + bool have_another = FALSE; + TABLE_LIST *table; + + while ((table= li++) || (have_another && (li=li2, have_another=FALSE, + (table= li++)))) + if (table->nested_join) + { + if (!table->on_expr) + { + /* It's a semi-join nest. Walk into it as if it wasn't a nest */ + have_another= TRUE; + li2= li; + li= List_iterator<TABLE_LIST>(table->nested_join->join_list); + } + else + count_cond_for_nj(sel, table); + } + if (nested_join_table->on_expr) + nested_join_table->on_expr->walk(&Item::count_sargable_conds, 0, sel); + +} + +/** + Update keyuse array with all possible keys we can use to fetch rows. + + @param thd + @param[out] keyuse Put here ordered array of KEYUSE structures + @param join_tab Array in tablenr_order + @param tables Number of tables in join + @param cond WHERE condition (note that the function analyzes + join_tab[i]->on_expr too) + @param normal_tables Tables not inner w.r.t some outer join (ones + for which we can make ref access based the WHERE + clause) + @param select_lex current SELECT + @param[out] sargables Array of found sargable candidates + + @retval + 0 OK + @retval + 1 Out of memory. +*/ + +static bool +update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, + uint tables, COND *cond, table_map normal_tables, + SELECT_LEX *select_lex, SARGABLE_PARAM **sargables) +{ + uint and_level,i; + KEY_FIELD *key_fields, *end, *field; + uint sz; + uint m= MY_MAX(select_lex->max_equal_elems,1); + DBUG_ENTER("update_ref_and_keys"); + DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables)); + + SELECT_LEX *sel=thd->lex->current_select; + sel->cond_count= 0; + sel->between_count= 0; + if (cond) + cond->walk(&Item::count_sargable_conds, 0, sel); + for (i=0 ; i < tables ; i++) + { + if (*join_tab[i].on_expr_ref) + (*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds, 0, sel); + } + { + List_iterator<TABLE_LIST> li(*join_tab->join->join_list); + TABLE_LIST *table; + while ((table= li++)) + { + if (table->nested_join) + count_cond_for_nj(sel, table); + } + } + + /* + We use the same piece of memory to store both KEY_FIELD + and SARGABLE_PARAM structure. + KEY_FIELD values are placed at the beginning this memory + while SARGABLE_PARAM values are put at the end. + All predicates that are used to fill arrays of KEY_FIELD + and SARGABLE_PARAM structures have at most 2 arguments + except BETWEEN predicates that have 3 arguments and + IN predicates. + This any predicate if it's not BETWEEN/IN can be used + directly to fill at most 2 array elements, either of KEY_FIELD + or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements + can be filled as this predicate is considered as + saragable with respect to each of its argument. + An IN predicate can require at most 1 element as currently + it is considered as sargable only for its first argument. + Multiple equality can add elements that are filled after + substitution of field arguments by equal fields. There + can be not more than select_lex->max_equal_elems such + substitutions. + */ + sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))* + ((sel->cond_count*2 + sel->between_count)*m+1); + if (!(key_fields=(KEY_FIELD*) thd->alloc(sz))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + and_level= 0; + field= end= key_fields; + *sargables= (SARGABLE_PARAM *) key_fields + + (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM); + /* set a barrier for the array of SARGABLE_PARAM */ + (*sargables)[0].field= 0; + + if (my_init_dynamic_array2(thd->mem_root->psi_key, keyuse, sizeof(KEYUSE), + thd->alloc(sizeof(KEYUSE) * 20), 20, 64, + MYF(MY_THREAD_SPECIFIC))) + DBUG_RETURN(TRUE); + + if (cond) + { + KEY_FIELD *saved_field= field; + cond->add_key_fields(join_tab->join, &end, &and_level, normal_tables, + sargables); + for (; field != end ; field++) + { + + /* Mark that we can optimize LEFT JOIN */ + if (field->val->type() == Item::NULL_ITEM && + !field->field->real_maybe_null()) + field->field->table->reginfo.not_exists_optimize=1; + } + field= saved_field; + } + for (i=0 ; i < tables ; i++) + { + /* + Block the creation of keys for inner tables of outer joins. + Here only the outer joins that can not be converted to + inner joins are left and all nests that can be eliminated + are flattened. + In the future when we introduce conditional accesses + for inner tables in outer joins these keys will be taken + into account as well. + */ + if (*join_tab[i].on_expr_ref) + (*join_tab[i].on_expr_ref)->add_key_fields(join_tab->join, &end, + &and_level, + join_tab[i].table->map, + sargables); + } + + /* Process ON conditions for the nested joins */ + { + List_iterator<TABLE_LIST> li(*join_tab->join->join_list); + TABLE_LIST *table; + while ((table= li++)) + { + if (table->nested_join) + add_key_fields_for_nj(join_tab->join, table, &end, &and_level, + sargables); + } + } + + /* fill keyuse with found key parts */ + for ( ; field != end ; field++) + { + if (add_key_part(keyuse,field)) + DBUG_RETURN(TRUE); + } + + if (select_lex->ftfunc_list->elements) + { + if (add_ft_keys(keyuse,join_tab,cond,normal_tables)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + +/* + check if key could be used with eq_ref + + The assumption is that all previous key parts where used +*/ + +static void remember_if_eq_ref_key(JOIN *join, KEYUSE *use) +{ + DBUG_ASSERT(use->keypart != FT_KEYPART && use->key != MAX_KEY); + TABLE *table= use->table; + KEY *key= table->key_info+use->key; + ulong key_flags= table->actual_key_flags(key); + + /* + Check if possible eq_ref key + This may include keys that does not have HA_NULL_PART_KEY + set, but this is ok as best_access_path will resolve this. + */ + if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME))) + { + uint key_parts= table->actual_n_key_parts(key); + if (use->keypart+1 == key_parts) + join->eq_ref_tables|= table->map; + } +} + + +/** + Sort the array of possible keys and remove the following key parts: + - ref if there is a keypart which is a ref and a const. + (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d, + then we skip the key part corresponding to b=t2.d) + - keyparts without previous keyparts + (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is + used in the query, we drop the partial key parts from consideration). + Special treatment for ft-keys. + Update join->eq_ref_tables with a bitmap of all tables that can possible + have a EQ_REF key. +*/ + +bool sort_and_filter_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse, + bool skip_unprefixed_keyparts) +{ + THD *thd= join->thd; + KEYUSE key_end, *prev, *save_pos, *use; + uint found_eq_constant, i; + bool found_unprefixed_key_part= 0; + + join->eq_ref_tables= 0; + DBUG_ASSERT(keyuse->elements); + + my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE), + (qsort_cmp) sort_keyuse); + + bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */ + if (insert_dynamic(keyuse, (uchar*) &key_end)) + return TRUE; + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)) + generate_derived_keys(keyuse); + + use= save_pos= dynamic_element(keyuse,0,KEYUSE*); + prev= &key_end; + found_eq_constant= 0; + /* Loop over all elements except the last 'key_end' */ + for (i=0 ; i < keyuse->elements-1 ; i++,use++) + { + if (!use->is_for_hash_join()) + { + if (!(use->used_tables & ~OUTER_REF_TABLE_BIT) && + use->optimize != KEY_OPTIMIZE_REF_OR_NULL) + use->table->const_key_parts[use->key]|= use->keypart_map; + if (use->keypart != FT_KEYPART) + { + if (use->key == prev->key && use->table == prev->table) + { + if (prev->keypart == use->keypart && found_eq_constant) + continue; + if (prev->keypart+1 < use->keypart) + { + found_unprefixed_key_part= 1; + if (skip_unprefixed_keyparts) + continue; /* remove */ + } + } + else + { + /* Key changed, check if previous key was a primary/unique key lookup */ + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + found_unprefixed_key_part= 0; + if (use->keypart != 0) + { + found_unprefixed_key_part= 1; + if (skip_unprefixed_keyparts) + continue; /* remove - first found key part must be 0 */ + } + } + } + else /* FT_KEY_PART */ + { + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + found_unprefixed_key_part= 1; // This key cannot be EQ_REF + } + prev= use; + found_eq_constant= !use->used_tables; + use->table->reginfo.join_tab->checked_keys.set_bit(use->key); + } + else + { + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + prev= &key_end; + } + /* + Old gcc used a memcpy(), which is undefined if save_pos==use: + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410 + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480 + This also disables a valgrind warning, so better to have the test. + */ + if (save_pos != use) + *save_pos= *use; + /* Save ptr to first use */ + if (!use->table->reginfo.join_tab->keyuse) + use->table->reginfo.join_tab->keyuse= save_pos; + save_pos++; + } + if (prev != &key_end && !found_unprefixed_key_part) + remember_if_eq_ref_key(join, prev); + i= (uint) (save_pos-(KEYUSE*) keyuse->buffer); + (void) set_dynamic(keyuse,(uchar*) &key_end,i); + keyuse->elements= i; + + return FALSE; +} + + +/** + Update some values in keyuse for faster choose_plan() loop. +*/ + +void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) +{ + KEYUSE *end,*keyuse= dynamic_element(keyuse_array, 0, KEYUSE*); + + for (end= keyuse+ keyuse_array->elements ; keyuse < end ; keyuse++) + { + table_map map; + /* + If we find a ref, assume this table matches a proportional + part of this table. + For example 100 records matching a table with 5000 records + gives 5000/100 = 50 records per key + Constant tables are ignored. + To avoid bad matches, we don't make ref_table_rows less than 100. + */ + keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref + if (keyuse->used_tables & + (map= (keyuse->used_tables & ~join->const_table_map & + ~OUTER_REF_TABLE_BIT))) + { + uint n_tables= my_count_bits(map); + if (n_tables == 1) // Only one table + { + DBUG_ASSERT(!(map & PSEUDO_TABLE_BITS)); // Must be a real table + Table_map_iterator it(map); + int tablenr= it.next_bit(); + DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END); + TABLE *tmp_table=join->table[tablenr]; + if (tmp_table) // already created + keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100); + } + } + /* + Outer reference (external field) is constant for single executing + of subquery + */ + if (keyuse->used_tables == OUTER_REF_TABLE_BIT) + keyuse->ref_table_rows= 1; + } +} + +/** + Check for the presence of AGGFN(DISTINCT a) queries that may be subject + to loose index scan. + + Check if the query is a subject to AGGFN(DISTINCT) using loose index scan + (QUICK_GROUP_MIN_MAX_SELECT). + Optionally (if out_args is supplied) will push the arguments of + AGGFN(DISTINCT) to the list + + Check for every COUNT(DISTINCT), AVG(DISTINCT) or + SUM(DISTINCT). These can be resolved by Loose Index Scan as long + as all the aggregate distinct functions refer to the same + fields. Thus: + + SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS + SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS + SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS + SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS + etc. + + @param join the join to check + @param[out] out_args Collect the arguments of the aggregate functions + to a list. We don't worry about duplicates as + these will be sorted out later in + get_best_group_min_max. + + @return does the query qualify for indexed AGGFN(DISTINCT) + @retval true it does + @retval false AGGFN(DISTINCT) must apply distinct in it. +*/ + +bool +is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args) +{ + Item_sum **sum_item_ptr; + bool result= false; + + if (join->table_count != 1 || /* reference more than 1 table */ + join->select_distinct || /* or a DISTINCT */ + join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */ + return false; + + Bitmap<MAX_FIELDS> first_aggdistinct_fields; + bool first_aggdistinct_fields_initialized= false; + for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++) + { + Item_sum *sum_item= *sum_item_ptr; + Item *expr; + /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */ + switch (sum_item->sum_func()) + { + case Item_sum::MIN_FUNC: + case Item_sum::MAX_FUNC: + continue; + case Item_sum::COUNT_DISTINCT_FUNC: + break; + case Item_sum::AVG_DISTINCT_FUNC: + case Item_sum::SUM_DISTINCT_FUNC: + if (sum_item->get_arg_count() == 1) + break; + /* fall through */ + default: return false; + } + /* + We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT). + Collect the arguments of the aggregate functions to a list. + We don't worry about duplicates as these will be sorted out later in + get_best_group_min_max + */ + Bitmap<MAX_FIELDS> cur_aggdistinct_fields; + cur_aggdistinct_fields.clear_all(); + for (uint i= 0; i < sum_item->get_arg_count(); i++) + { + expr= sum_item->get_arg(i); + /* The AGGFN(DISTINCT) arg is not an attribute? */ + if (expr->real_item()->type() != Item::FIELD_ITEM) + return false; + + Item_field* item= static_cast<Item_field*>(expr->real_item()); + if (out_args) + out_args->push_back(item, join->thd->mem_root); + + cur_aggdistinct_fields.set_bit(item->field->field_index); + result= true; + } + /* + If there are multiple aggregate functions, make sure that they all + refer to exactly the same set of columns. + */ + if (!first_aggdistinct_fields_initialized) + { + first_aggdistinct_fields= cur_aggdistinct_fields; + first_aggdistinct_fields_initialized=true; + } + else if (first_aggdistinct_fields != cur_aggdistinct_fields) + return false; + } + + return result; +} + + +/** + Discover the indexes that can be used for GROUP BY or DISTINCT queries. + + If the query has a GROUP BY clause, find all indexes that contain all + GROUP BY fields, and add those indexes to join->const_keys. + + If the query has a DISTINCT clause, find all indexes that contain all + SELECT fields, and add those indexes to join->const_keys. + This allows later on such queries to be processed by a + QUICK_GROUP_MIN_MAX_SELECT. + + @param join + @param join_tab + + @return + None +*/ + +static void +add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab) +{ + List<Item_field> indexed_fields; + List_iterator<Item_field> indexed_fields_it(indexed_fields); + ORDER *cur_group; + Item_field *cur_item; + key_map possible_keys(0); + + if (join->group_list) + { /* Collect all query fields referenced in the GROUP clause. */ + for (cur_group= join->group_list; cur_group; cur_group= cur_group->next) + (*cur_group->item)->walk(&Item::collect_item_field_processor, 0, + &indexed_fields); + } + else if (join->select_distinct) + { /* Collect all query fields referenced in the SELECT clause. */ + List<Item> &select_items= join->fields_list; + List_iterator<Item> select_items_it(select_items); + Item *item; + while ((item= select_items_it++)) + item->walk(&Item::collect_item_field_processor, 0, &indexed_fields); + } + else if (!join->tmp_table_param.sum_func_count || + !is_indexed_agg_distinct(join, &indexed_fields)) + { + /* + There where no GROUP BY fields and also either no aggregate + functions or not all aggregate functions where used with the + same DISTINCT (or MIN() / MAX() that works similarly). + Nothing to do there. + */ + return; + } + + if (indexed_fields.elements == 0) + { + /* There where no index we could use to satisfy the GROUP BY */ + return; + } + + /* Intersect the keys of all group fields. */ + cur_item= indexed_fields_it++; + possible_keys.merge(cur_item->field->part_of_key); + while ((cur_item= indexed_fields_it++)) + { + possible_keys.intersect(cur_item->field->part_of_key); + } + + if (!possible_keys.is_clear_all()) + join_tab->const_keys.merge(possible_keys); +} + + +/***************************************************************************** + Go through all combinations of not marked tables and find the one + which uses least records +*****************************************************************************/ + +/** Save const tables first as used tables. */ + +void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) +{ + join->positions[idx].table= table; + join->positions[idx].key=key; + join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].cond_selectivity= 1.0; + join->positions[idx].ref_depend_map= 0; + join->positions[idx].partial_join_cardinality= 1; + +// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ + join->positions[idx].sj_strategy= SJ_OPT_NONE; + join->positions[idx].use_join_buffer= FALSE; + join->positions[idx].range_rowid_filter_info= 0; + + /* Move the const table as down as possible in best_ref */ + JOIN_TAB **pos=join->best_ref+idx+1; + JOIN_TAB *next=join->best_ref[idx]; + for (;next != table ; pos++) + { + JOIN_TAB *tmp=pos[0]; + pos[0]=next; + next=tmp; + } + join->best_ref[idx]=table; + join->positions[idx].spl_plan= 0; + join->positions[idx].spl_pd_boundary= 0; +} + + +/* + Estimate how many records we will get if we read just this table and apply + a part of WHERE that can be checked for it. + + @param s Current JOIN_TAB + @param use_cond_selectivity Value of optimizer_use_condition_selectivity. + If > 1 then use table->cond_selecitivity. + @param force_estiamte Set to 1 if we should not call + use_found_constraint. To be deleted in 11.0 + @return 0.0 No matching rows + @return >= 1.0 Number of expected matching rows + + @detail + Estimate how many records we will get if we + - read the given table with its "independent" access method (either quick + select or full table/index scan), + - apply the part of WHERE that refers only to this table. + + @see also + table_cond_selectivity() produces selectivity of condition that is checked + after joining rows from this table to rows from preceding tables. +*/ + +static double apply_selectivity_for_table(JOIN_TAB *s, + uint use_cond_selectivity, + bool *force_estimate) +{ + ha_rows records; + double dbl_records; + + if (use_cond_selectivity > 1) + { + TABLE *table= s->table; + double sel= table->cond_selectivity; + double table_records= rows2double(s->records); + dbl_records= table_records * sel; + *force_estimate= 1; // Don't call use_found_constraint() + return dbl_records; + } + + records = s->found_records; + + /* + If applicable, get a more accurate estimate. + */ + DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records); + if (s->table->opt_range_condition_rows != s->found_records) + { + *force_estimate= 1; // Don't call use_found_constraint() + records= s->table->opt_range_condition_rows; + } + + dbl_records= (double)records; + return dbl_records; +} + +/* + Take into account that the table's WHERE clause has conditions on earlier + tables that can reduce the number of accepted rows. + + @param records Number of original rows (after selectivity) + + If there is a filtering condition on the table (i.e. ref analyzer found + at least one "table.keyXpartY= exprZ", where exprZ refers only to tables + preceding this table in the join order we're now considering), then + assume that 25% of the rows will be filtered out by this condition. + + This heuristic is supposed to force tables used in exprZ to be before + this table in join order. +*/ + +inline double use_found_constraint(double records) +{ + records-= records/4; + return records; +} + + +/* + Calculate the cost of reading a set of rows trough an index + + Logically this is identical to the code in multi_range_read_info_const() + excepts the function also takes into account io_blocks and multiple + ranges. + + One main difference between the functions is that + multi_range_read_info_const() adds a very small cost per range + (IDX_LOOKUP_COST) and also MULTI_RANGE_READ_SETUP_COST, to ensure that + 'ref' is preferred slightly over ranges. +*/ + +double cost_for_index_read(const THD *thd, const TABLE *table, uint key, + ha_rows records, ha_rows worst_seeks) +{ + DBUG_ENTER("cost_for_index_read"); + double cost; + handler *file= table->file; + + set_if_smaller(records, (ha_rows) thd->variables.max_seeks_for_key); + if (file->is_clustering_key(key)) + cost= file->read_time(key, 1, records); + else + if (table->covering_keys.is_set(key)) + cost= file->keyread_time(key, 1, records); + else + cost= ((file->keyread_time(key, 0, records) + + file->read_time(key, 1, MY_MIN(records, worst_seeks)))); + + DBUG_PRINT("statistics", ("cost: %.3f", cost)); + DBUG_RETURN(cost); +} + + +/* + Adjust cost from table->quick_costs calculated by + multi_range_read_info_const() to be comparable with cost_for_index_read() + + This functions is needed because best_access_path() doesn't add + TIME_FOR_COMPARE to it's costs until very late. + Preferably we should fix so that all costs are comparably. + (All compared costs should include TIME_FOR_COMPARE for all found + rows). +*/ + +double adjust_quick_cost(double quick_cost, ha_rows records) +{ + double cost= (quick_cost - MULTI_RANGE_READ_SETUP_COST - + rows2double(records)/TIME_FOR_COMPARE); + DBUG_ASSERT(cost > 0.0); + return cost; +} + + +/* + @brief + Compute the fanout of hash join operation using EITS data +*/ + +double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables, + double rnd_records, KEYUSE *hj_start_key, + bool *stats_found) +{ + THD *thd= join->thd; + /* + Before doing the hash join, we will scan the table and apply the local part + of the WHERE condition. This will produce rnd_records. + + The EITS statistics describes the entire table. Calling + + table->field[N]->get_avg_frequency() + + produces average #rows in the table with some value. + + What happens if we filter out rows so that rnd_records rows are left? + Something between the two outcomes: + A. filtering removes a fraction of rows for each value: + avg_frequency=avg_frequency * condition_selectivity + + B. filtering removes entire groups of rows with the same value, but + the remaining groups remain of the same size. + + We make pessimistic assumption and assume B. + We also handle an edge case: if rnd_records is less than avg_frequency, + assume we'll get rnd_records rows with the same value, and return + rnd_records as the fanout estimate. + */ + double min_freq= rnd_records; + + Json_writer_object trace_obj(thd, "hash_join_cardinality"); + /* + There can be multiple KEYUSE referring to same or different columns + + KEYUSE(tbl.col1 = ...) + KEYUSE(tbl.col1 = ...) + KEYUSE(tbl.col2 = ...) + + Hash join code can use multiple columns: (col1, col2) for joining. + We need n_distinct({col1, col2}). + + EITS only has statistics on individual columns: n_distinct(col1), + n_distinct(col2). + + Our current solution is to be very conservative and use selectivity + of one column with the lowest avg_frequency. + + In the future, we should an approach that cautiosly takes into account + multiple KEYUSEs either multiply by number of equalities or by sqrt + of the second most selective equality. + */ + Json_writer_array trace_arr(thd, "hash_join_columns"); + for (KEYUSE *keyuse= hj_start_key; + keyuse->table == s->table && is_hash_join_key_no(keyuse->key); + keyuse++) + { + if (!(remaining_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + s->access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables)) + { + Field *field= s->table->field[keyuse->keypart]; + if (is_eits_usable(field)) + { + double freq= field->read_stats->get_avg_frequency(); + + Json_writer_object trace_field(thd); + trace_field.add("field",field->field_name.str). + add("avg_frequency", freq); + if (freq < min_freq) + min_freq= freq; + *stats_found= 1; + } + } + } + trace_arr.end(); + trace_obj.add("rows", min_freq); + return min_freq; +} + + +/** + Find the best access path for an extension of a partial execution + plan and add this path to the plan. + + The function finds the best access path to table 's' from the passed + partial plan where an access path is the general term for any means to + access the data in 's'. An access path may use either an index or a scan, + whichever is cheaper. The input partial plan is passed via the array + 'join->positions' of length 'idx'. The chosen access method for 's' and its + cost are stored in 'join->positions[idx]'. + + @param join pointer to the structure providing all context info + for the query + @param s the table to be joined by the function + @param thd thread for the connection that submitted the query + @param remaining_tables set of tables not included into the partial plan yet + @param idx the length of the partial plan + @param disable_jbuf TRUE<=> Don't use join buffering + @param record_count estimate for the number of records returned by the + partial plan + @param pos OUT Table access plan + @param loose_scan_pos OUT Table plan that uses loosescan, or set cost to + DBL_MAX if not possible. + + @return + None +*/ + +void +best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + const POSITION *join_positions, + uint idx, + bool disable_jbuf, + double record_count, + POSITION *pos, + POSITION *loose_scan_pos) +{ + THD *thd= join->thd; + uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; + KEYUSE *best_key= 0; + uint best_max_key_part= 0; + my_bool found_constraint= 0; + double best= DBL_MAX; + double best_time= DBL_MAX; + double records= DBL_MAX; + ha_rows records_for_key= 0; + table_map best_ref_depends_map= 0; + /* + key_dependent is 0 if all key parts could be used or if there was an + EQ_REF table found (which uses all key parts). In other words, we cannot + find a better key for the table even if remaining_tables is reduced. + Otherwise it's a bitmap of tables that could improve key usage. + */ + table_map key_dependent= 0; + Range_rowid_filter_cost_info *best_filter= 0; + double tmp; + double keyread_tmp= 0; + ha_rows rec; + bool best_uses_jbuf= FALSE; + MY_BITMAP *eq_join_set= &s->table->eq_join_set; + KEYUSE *hj_start_key= 0; + SplM_plan_info *spl_plan= 0; + table_map spl_pd_boundary= 0; + Range_rowid_filter_cost_info *filter= 0; + const char* cause= NULL; + enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN; + + disable_jbuf= disable_jbuf || idx == join->const_tables; + + Loose_scan_opt loose_scan_opt; + DBUG_ENTER("best_access_path"); + + Json_writer_object trace_wrapper(thd, "best_access_path"); + + trace_wrapper.add_table_name(s); + + bitmap_clear_all(eq_join_set); + + loose_scan_opt.init(join, s, remaining_tables); + + if (s->table->is_splittable()) + spl_plan= s->choose_best_splitting(idx, + remaining_tables, + join_positions, + &spl_pd_boundary); + + Json_writer_array trace_paths(thd, "considered_access_paths"); + if (s->keyuse) + { /* Use key if possible */ + KEYUSE *keyuse; + KEYUSE *start_key=0; + TABLE *table= s->table; + double best_records= DBL_MAX; + uint max_key_part=0; + + /* Test how we can use keys */ + rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key + for (keyuse=s->keyuse ; keyuse->table == table ;) + { + KEY *keyinfo; + ulong key_flags; + uint key_parts; + key_part_map found_part= 0; + key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple. + table_map found_ref= 0; + uint key= keyuse->key; + filter= 0; + bool ft_key= (keyuse->keypart == FT_KEYPART); + /* Bitmap of keyparts where the ref access is over 'keypart=const': */ + key_part_map const_part= 0; + /* The or-null keypart in ref-or-null access: */ + key_part_map ref_or_null_part= 0; + key_part_map all_parts= 0; + + if (is_hash_join_key_no(key)) + { + /* + Hash join as any join employing join buffer can be used to join + only those tables that are joined after the first non const table + */ + if (!(remaining_tables & keyuse->used_tables) && + idx > join->const_tables) + { + if (!hj_start_key) + hj_start_key= keyuse; + bitmap_set_bit(eq_join_set, keyuse->keypart); + } + keyuse++; + continue; + } + + keyinfo= table->key_info+key; + key_parts= table->actual_n_key_parts(keyinfo); + key_flags= table->actual_key_flags(keyinfo); + + /* Calculate how many key segments of the current key we can use */ + start_key= keyuse; + + loose_scan_opt.next_ref_key(); + DBUG_PRINT("info", ("Considering ref access on key %s", + keyuse->table->key_info[keyuse->key].name.str)); + + do /* For each keypart */ + { + uint keypart= keyuse->keypart; + table_map best_part_found_ref= 0, key_parts_dependent= 0; + double best_prev_record_reads= DBL_MAX; + + do /* For each way to access the keypart */ + { + /* + if 1. expression doesn't refer to forward tables + 2. we won't get two ref-or-null's + */ + all_parts|= keyuse->keypart_map; + if (!(remaining_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + s->access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables) && + !(ref_or_null_part && (keyuse->optimize & + KEY_OPTIMIZE_REF_OR_NULL))) + { + found_part|= keyuse->keypart_map; + key_parts_dependent= 0; + if (!(keyuse->used_tables & ~join->const_table_map)) + const_part|= keyuse->keypart_map; + + if (!keyuse->val->maybe_null() || keyuse->null_rejecting) + notnull_part|=keyuse->keypart_map; + + double tmp2= prev_record_reads(join_positions, idx, + (found_ref | keyuse->used_tables)); + if (tmp2 < best_prev_record_reads) + { + best_part_found_ref= keyuse->used_tables & ~join->const_table_map; + best_prev_record_reads= tmp2; + } + if (rec > keyuse->ref_table_rows) + rec= keyuse->ref_table_rows; + /* + If there is one 'key_column IS NULL' expression, we can + use this ref_or_null optimisation of this field + */ + if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) + ref_or_null_part |= keyuse->keypart_map; + } + else if (!(found_part & keyuse->keypart_map)) + key_parts_dependent|= keyuse->used_tables; + + loose_scan_opt.add_keyuse(remaining_tables, keyuse); + keyuse++; + } while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keypart); + /* If we found a usable key, remember the dependent tables */ + if (all_parts & 1) + key_dependent|= key_parts_dependent; + found_ref|= best_part_found_ref; + } while (keyuse->table == table && keyuse->key == key); + + /* + Assume that that each key matches a proportional part of table. + */ + if (!found_part && !ft_key && !loose_scan_opt.have_a_case()) + continue; // Nothing usable found + + if (rec < MATCHING_ROWS_IN_OTHER_TABLE) + rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables + + Json_writer_object trace_access_idx(thd); + /* + full text keys require special treatment + */ + if (ft_key) + { + /* + Really, there should be records=0.0 (yes!) + but 1.0 would be probably safer + */ + tmp= prev_record_reads(join_positions, idx, found_ref); + records= 1.0; + type= JT_FT; + trace_access_idx.add("access_type", join_type_str[type]) + .add("full-text index", keyinfo->name); + } + else + { + found_constraint= MY_TEST(found_part); + loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part); + + /* Check if we found full key */ + const key_part_map all_key_parts= PREV_BITS(uint, key_parts); + if (found_part == all_key_parts && !ref_or_null_part) + { /* use eq key */ + max_key_part= (uint) ~0; + /* + If the index is a unique index (1), and + - all its columns are not null (2), or + - equalities we are using reject NULLs (3) + then the estimate is rows=1. + */ + if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) && // (1) + (!(key_flags & HA_NULL_PART_KEY) || // (2) + all_key_parts == notnull_part)) // (3) + { + /* Check that eq_ref_tables are correctly updated */ + DBUG_ASSERT(join->eq_ref_tables & table->map); + /* TODO: Adjust cost for covering and clustering key */ + type= JT_EQ_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + + if (!found_ref && table->opt_range_keys.is_set(key)) + tmp= adjust_quick_cost(table->opt_range[key].cost, 1); + else + tmp= table->file->avg_io_cost(); + tmp*= prev_record_reads(join_positions, idx, + found_ref); + records=1.0; + } + else + { + type= JT_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + if (!found_ref) + { /* We found a const key */ + /* + ReuseRangeEstimateForRef-1: + We get here if we've found a ref(const) (c_i are constants): + "(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond] + + If range optimizer was able to construct a "range" + access on this index, then its condition "quick_cond" was + eqivalent to ref_const_cond (*), and we can re-use E(#rows) + from the range optimizer. + + Proof of (*): By properties of range and ref optimizers + quick_cond will be equal or tighther than ref_const_cond. + ref_const_cond already covers "smallest" possible interval - + a singlepoint interval over all keyparts. Therefore, + quick_cond is equivalent to ref_const_cond (if it was an + empty interval we wouldn't have got here). + */ + if (table->opt_range_keys.is_set(key)) + { + records= (double) table->opt_range[key].rows; + trace_access_idx.add("used_range_estimates", true); + tmp= adjust_quick_cost(table->opt_range[key].cost, + table->opt_range[key].rows); + goto got_cost; + } + else + { + /* quick_range couldn't use key! */ + records= (double) s->records/rec; + trace_access_idx.add("used_range_estimates", false) + .add("reason", "not available"); + } + } + else + { + if (!(records= keyinfo->actual_rec_per_key(key_parts-1))) + { /* Prefer longer keys */ + trace_access_idx.add("rec_per_key_stats_missing", true); + records= + ((double) s->records / (double) rec * + (1.0 + + ((double) (table->s->max_key_length-keyinfo->key_length) / + (double) table->s->max_key_length))); + if (records < 2.0) + records=2.0; /* Can't be as good as a unique */ + } + /* + ReuseRangeEstimateForRef-2: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make an adjustment. The criteria of when we + can make an adjustment is a special case of the criteria used + in ReuseRangeEstimateForRef-3. + */ + if (table->opt_range_keys.is_set(key) && + (const_part & + (((key_part_map)1 << table->opt_range[key].key_parts)-1)) == + (((key_part_map)1 << table->opt_range[key].key_parts)-1) && + table->opt_range[key].ranges == 1 && + records > (double) table->opt_range[key].rows) + { + records= (double) table->opt_range[key].rows; + trace_access_idx.add("used_range_estimates", "clipped down"); + } + else + { + trace_access_idx.add("used_range_estimates", false); + if (table->opt_range_keys.is_set(key)) + { + trace_access_idx.add("reason", "not better than ref estimates"); + } + else + { + trace_access_idx.add("reason", "not available"); + } + } + } + /* Limit the number of matched rows */ + tmp= cost_for_index_read(thd, table, key, (ha_rows) records, + (ha_rows) s->worst_seeks); + records_for_key= (ha_rows) records; + set_if_smaller(records_for_key, thd->variables.max_seeks_for_key); + keyread_tmp= table->file->keyread_time(key, 1, records_for_key); + got_cost: + tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); + } + } + else + { + type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF; + trace_access_idx.add("access_type", join_type_str[type]) + .add("index", keyinfo->name); + /* + Use as much key-parts as possible and a uniq key is better + than a not unique key + Set tmp to (previous record count) * (records / combination) + */ + if ((found_part & 1) && + (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || + found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) + { + max_key_part= max_part_bit(found_part); + /* + ReuseRangeEstimateForRef-3: + We're now considering a ref[or_null] access via + (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL")] (**) + + Try re-using E(#rows) from "range" optimizer: + We can do so if "range" optimizer used the same intervals as + in (**). The intervals used by range optimizer may be not + available at this point (as "range" access might have chosen to + create quick select over another index), so we can't compare + them to (**). We'll make indirect judgements instead. + The sufficient conditions for re-use are: + (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if + this is not satisfied we have no way to know which ranges + will be actually scanned by 'ref' until we execute the + join) + (C2) max #key parts in 'range' access == K == max_key_part (this + is apparently a necessary requirement) + + We also have a property that "range optimizer produces equal or + tighter set of scan intervals than ref(const) optimizer". Each + of the intervals in (**) are "tightest possible" intervals when + one limits itself to using keyparts 1..K (which we do in #2). + From here it follows that range access used either one, or + both of the (I1) and (I2) intervals: + + (t.keypart1=c1 AND ... AND t.keypartK=eK) (I1) + (same-as-above but with one cond replaced + with "t.keypart_i IS NULL") (I2) + + The remaining part is to exclude the situation where range + optimizer used one interval while we're considering + ref-or-null and looking for estimate for two intervals. This + is done by last limitation: + + (C3) "range optimizer used (have ref_or_null?2:1) intervals" + */ + if (table->opt_range_keys.is_set(key) && !found_ref && //(C1) + table->opt_range[key].key_parts == max_key_part && //(C2) + table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3) + { + records= (double) table->opt_range[key].rows; + tmp= adjust_quick_cost(table->opt_range[key].cost, + table->opt_range[key].rows); + trace_access_idx.add("used_range_estimates", true); + goto got_cost2; + } + else + { + /* Check if we have statistic about the distribution */ + if ((records= keyinfo->actual_rec_per_key(max_key_part-1))) + { + /* + Fix for the case where the index statistics is too + optimistic: If + (1) We're considering ref(const) and there is quick select + on the same index, + (2) and that quick select uses more keyparts (i.e. it will + scan equal/smaller interval then this ref(const)) + (3) and E(#rows) for quick select is higher then our + estimate, + Then + We'll use E(#rows) from quick select. + + Q: Why do we choose to use 'ref'? Won't quick select be + cheaper in some cases ? + TODO: figure this out and adjust the plan choice if needed. + */ + if (table->opt_range_keys.is_set(key)) + { + if (table->opt_range[key].key_parts >= max_key_part) // (2) + { + double rows= (double) table->opt_range[key].rows; + if (!found_ref && // (1) + records < rows) // (3) + { + trace_access_idx.add("used_range_estimates", "clipped up"); + records= rows; + } + } + } + } + else + { + trace_access_idx.add("rec_per_key_stats_missing", true); + /* + Assume that the first key part matches 1% of the file + and that the whole key matches 10 (duplicates) or 1 + (unique) records. + Assume also that more key matches proportionally more + records + This gives the formula: + records = (x * (b-a) + a*c-b)/(c-1) + + b = records matched by whole key + a = records matched by first key part (1% of all records?) + c = number of key parts in key + x = used key parts (1 <= x <= c) + */ + double rec_per_key; + if (!(rec_per_key=(double) + keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1])) + rec_per_key=(double) s->records/rec+1; + + if (!s->records) + records= 0; + else if (rec_per_key/(double) s->records >= 0.01) + records= rec_per_key; + else + { + double a=s->records*0.01; + if (keyinfo->user_defined_key_parts > 1) + records= (max_key_part * (rec_per_key - a) + + a*keyinfo->user_defined_key_parts - rec_per_key)/ + (keyinfo->user_defined_key_parts-1); + else + records= a; + set_if_bigger(records, 1.0); + } + } + + if (ref_or_null_part) + { + /* We need to do two key searches to find row */ + records *= 2.0; + } + + /* + ReuseRangeEstimateForRef-4: We get here if we could not reuse + E(#rows) from range optimizer. Make another try: + + If range optimizer produced E(#rows) for a prefix of the ref + access we're considering, and that E(#rows) is lower then our + current estimate, make the adjustment. + + The decision whether we can re-use the estimate from the range + optimizer is the same as in ReuseRangeEstimateForRef-3, + applied to first table->quick_key_parts[key] key parts. + */ + if (table->opt_range_keys.is_set(key) && + table->opt_range[key].key_parts <= max_key_part && + const_part & + ((key_part_map)1 << table->opt_range[key].key_parts) && + table->opt_range[key].ranges == (1 + + MY_TEST(ref_or_null_part & + const_part)) && + records > (double) table->opt_range[key].rows) + { + trace_access_idx.add("used_range_estimates", true); + records= (double) table->opt_range[key].rows; + } + } + + /* Limit the number of matched rows */ + tmp= cost_for_index_read(thd, table, key, (ha_rows) records, + (ha_rows) s->worst_seeks); + records_for_key= (ha_rows) records; + set_if_smaller(records_for_key, thd->variables.max_seeks_for_key); + keyread_tmp= table->file->keyread_time(key, 1, records_for_key); + got_cost2: + tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); + } + else + { + if (!(found_part & 1)) + cause= "no predicate for first keypart"; + tmp= best_time; // Do nothing + } + } + + tmp= COST_ADD(tmp, s->startup_cost); + loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp, + found_ref); + } /* not ft_key */ + + if (records < DBL_MAX && + (found_part & 1) && // start_key->key can be used for index access + (table->file->index_flags(start_key->key,0,1) & + HA_DO_RANGE_FILTER_PUSHDOWN)) + { + double rows= record_count * records; + /* + If we use filter F with selectivity s the the cost of fetching data + by key using this filter will be + cost_of_fetching_1_row * rows * s + + cost_of_fetching_1_key_tuple * rows * (1 - s) + + cost_of_1_lookup_into_filter * rows + Without using any filter the cost would be just + cost_of_fetching_1_row * rows + + So the gain in access cost per row will be + cost_of_fetching_1_row * (1 - s) - + cost_of_fetching_1_key_tuple * (1 - s) - + cost_of_1_lookup_into_filter + = + (cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s) + - cost_of_1_lookup_into_filter + + Here we have: + cost_of_fetching_1_row = tmp/rows + cost_of_fetching_1_key_tuple = keyread_tmp/rows + + Here's a more detailed explanation that uses the formulas behind + the function the call filter->get_adjusted_gain(). The function + takes as a parameter the number of probes/look-ups into the filter + that is equal to the number of fetched key entries that is equal to + the number of row fetches when no filter is used (assuming no + index condition pushdown is employed for the used key access). + Let this number be N. Then the total gain from using the filter is + N*a_adj - b where b is the cost of building the filter and + a_adj is calcilated as follows: + a - (1-access_cost_factor)*(1-s) = + (1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s) + = (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost. + Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less + conditions pushed into the table, 1_probe_cost*N is the cost of the + probes and (1*s) * access_cost_factor * N must be the gain from + accessing less rows. + It does not matter how we calculate the cost of N full row fetches + cost_of_fetching_N_rows or + how we calculate the cost of fetching N key entries + cost_of_fetching_N_key_entries + the gain from less row fetches will be + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s) + and this should be equal to (1*s) * access_cost_factor * N. + Thus access_cost_factor must be calculated as + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N. + + For safety we clip cost_of_fetching_N_key_entries by the value + of cost_of_fetching_N_row though formally it's not necessary. + */ + /* + For eq_ref access we assume that the cost of fetching N key entries + is equal to the half of fetching N rows + */ + double key_access_cost= + type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp); + double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0); + + if (!(records < s->worst_seeks && + records <= thd->variables.max_seeks_for_key)) + { + // Don't use rowid filter + trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping"); + filter= NULL; + } + else + { + filter= + table->best_range_rowid_filter_for_partial_join(start_key->key, + rows, + access_cost_factor); + } + if (filter) + { + tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows); + DBUG_ASSERT(tmp >= 0); + trace_access_idx.add("rowid_filter_key", + table->key_info[filter->key_no].name); + } + } + trace_access_idx.add("rows", records).add("cost", tmp); + + if (tmp + 0.0001 < best_time - records/TIME_FOR_COMPARE) + { + trace_access_idx.add("chosen", true); + best_time= COST_ADD(tmp, records/TIME_FOR_COMPARE); + best= tmp; + best_records= records; + best_key= start_key; + best_max_key_part= max_key_part; + best_ref_depends_map= found_ref; + best_filter= filter; + best_type= type; + } + else + { + trace_access_idx.add("chosen", false) + .add("cause", cause ? cause : "cost"); + } + cause= nullptr; + } /* for each key */ + records= best_records; + } + else + { + /* + No usable keys found. However, there may still be an option to use + "Range checked for each record" when all depending tables has + been read. s->key_dependent tells us which tables these could be and + s->key_start_dependent tells us if a first key part was used. + s->key_dependent may include more tables than could be used, + but this is ok as not having any usable keys is a rare thing and + the performance penalty for extra table bits is that + best_extension_by_limited_search() would not be able to prune tables + earlier. + Example query: + SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1 + */ + if (s->key_start_dependent) + key_dependent= s->key_dependent; + /* Add dependencey for sub queries */ + key_dependent|= s->embedded_dependent; + } + /* Check that s->key_dependent contains all used_tables found in s->keyuse */ + key_dependent&= ~PSEUDO_TABLE_BITS; + DBUG_ASSERT((key_dependent & (s->key_dependent | s->embedded_dependent)) == + key_dependent); + + /* + If there is no key to access the table, but there is an equi-join + predicate connecting the table with the privious tables then we + consider the possibility of using hash join. + We need also to check that: + (1) s is inner table of semi-join -> join cache is allowed for semijoins + (2) s is inner table of outer join -> join cache is allowed for outer joins + */ + if (idx > join->const_tables && best_key == 0 && + (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && + join->max_allowed_join_cache_level > 2 && + !bitmap_is_clear_all(eq_join_set) && !disable_jbuf && + (!s->emb_sj_nest || + join->allowed_semijoin_with_cache) && // (1) + (!(s->table->map & join->outer_join) || + join->allowed_outer_join_with_cache)) // (2) + { + double fanout; + double join_sel; + bool stats_found= 0, force_estimate= 0; + Json_writer_object trace_access_hash(thd); + trace_access_hash.add("type", "hash"); + trace_access_hash.add("index", "hj-key"); + /* Estimate the cost of the hash join access to the table */ + double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity, + &force_estimate); + + DBUG_ASSERT(hj_start_key); + if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) + { + /* + Starting from this point, rnd_records should not be used anymore. + Use "fanout" for an estimate of # matching records. + */ + fanout= hash_join_fanout(join, s, remaining_tables, rnd_records, + hj_start_key, &stats_found); + join_sel= 1.0; // Don't do the "10% heuristic" + if (stats_found) + goto fanout_computed; + } + + /* + No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics + found. + + Take into account if there is non constant constraints used with + earlier tables in the where expression. + If yes, this will set fanout to rnd_records/4. + We estimate that there will be HASH_FANOUT (10%) + hash matches / row. + */ + if (found_constraint && !force_estimate) + rnd_records= use_found_constraint(rnd_records); + fanout= rnd_records; + join_sel= 0.1; + + fanout_computed: + tmp= s->quick ? s->quick->read_time : s->scan_time(); + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); + + /* We read the table as many times as join buffer becomes full. */ + + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + record_count / + (double) thd->variables.join_buff_size)); + tmp= COST_MULT(tmp, refills); + + // Add cost of reading/writing the join buffer + if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) + { + /* Set it to be 1/10th of TIME_FOR_COMPARE */ + double row_copy_cost= 1.0 / (10*TIME_FOR_COMPARE); + double join_buffer_operations= + COST_ADD( + COST_MULT(record_count, row_copy_cost), + COST_MULT(record_count, fanout * (idx - join->const_tables)) + ); + double jbuf_use_cost= row_copy_cost * join_buffer_operations; + trace_access_hash.add("jbuf_use_cost", jbuf_use_cost); + tmp= COST_ADD(tmp, jbuf_use_cost); + } + + double where_cost= COST_MULT((fanout*join_sel) / TIME_FOR_COMPARE, + record_count); + trace_access_hash.add("extra_cond_check_cost", where_cost); + + best_time= COST_ADD(tmp, where_cost); + + best= tmp; + records= fanout; + best_key= hj_start_key; + best_ref_depends_map= 0; + best_uses_jbuf= TRUE; + best_filter= 0; + best_type= JT_HASH; + trace_access_hash.add("rnd_records", rnd_records); + trace_access_hash.add("records", records); + trace_access_hash.add("cost", best); + trace_access_hash.add("chosen", true); + } + + /* + Don't test table scan if it can't be better. + Prefer key lookup if we would use the same key for scanning. + + Don't do a table scan on InnoDB tables, if we can read the used + parts of the row from any of the used index. + This is because table scans uses index and we would not win + anything by using a table scan. + + A word for word translation of the below if-statement in sergefp's + understanding: we check if we should use table scan if: + (1) The found 'ref' access produces more records than a table scan + (or index scan, or quick select), or 'ref' is more expensive than + any of them. + (2) This doesn't hold: the best way to perform table scan is to to perform + 'range' access using index IDX, and the best way to perform 'ref' + access is to use the same index IDX, with the same or more key parts. + (note: it is not clear how this rule is/should be extended to + index_merge quick selects). Also if we have a hash join we prefer that + over a table scan. This heuristic doesn't apply if the quick select + uses the group-by min-max optimization. + (3) See above note about InnoDB. + (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access + path, but there is no quick select) + If the condition in the above brackets holds, then the only possible + "table scan" access method is ALL/index (there is no quick select). + Since we have a 'ref' access path, and FORCE INDEX instructs us to + choose it over ALL/index, there is no need to consider a full table + scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. + */ + Json_writer_object trace_access_scan(thd); + if ((records >= s->found_records || best > s->read_time) && // (1) + !(best_key && best_key->key == MAX_KEY) && // (2) + !(s->quick && + s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) + best_key && s->quick->index == best_key->key && // (2) + s->table->opt_range_keys.is_set(best_key->key) && // (2) + best_max_key_part >= s->table->opt_range[best_key->key].key_parts) &&// (2) + !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) + ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) + !(s->table->force_index && best_key && !s->quick) && // (4) + !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) + { // Check full join + bool force_estimate= 0; + double rnd_records= apply_selectivity_for_table(s, + use_cond_selectivity, + &force_estimate); + rnd_records= ((found_constraint && !force_estimate) ? + use_found_constraint(rnd_records) : + rnd_records); + /* + Range optimizer never proposes a RANGE if it isn't better + than FULL: so if RANGE is present, it's always preferred to FULL. + Here we estimate its cost. + */ + + filter= 0; + if (s->quick) + { + /* + For each record we: + - read record range through 'quick' + - skip rows which does not satisfy WHERE constraints + TODO: + We take into account possible use of join cache for ALL/index + access (see first else-branch below), but we don't take it into + account here for range/index_merge access. Find out why this is so. + */ + double cmp_time= (s->found_records - rnd_records) / TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, + COST_ADD(s->quick->read_time, cmp_time)); + + if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + double rows= record_count * s->found_records; + uint key_no= s->quick->index; + + /* See the comment concerning using rowid filter for with ref access */ + double row_access_cost= s->quick->read_time * record_count; + double key_access_cost= + MY_MIN(row_access_cost, + s->table->opt_range[key_no].index_only_cost * record_count); + double access_cost_factor= MY_MIN((row_access_cost - key_access_cost) / + rows, 1.0); + filter= + s->table->best_range_rowid_filter_for_partial_join(key_no, rows, + access_cost_factor); + if (filter) + { + tmp-= filter->get_adjusted_gain(rows); + DBUG_ASSERT(tmp >= 0); + } + + type= JT_RANGE; + } + else + { + type= JT_INDEX_MERGE; + best_filter= 0; + } + loose_scan_opt.check_range_access(join, idx, s->quick); + } + else + { + /* Estimate cost of reading table. */ + if (s->table->force_index && !best_key) // index scan + { + type= JT_NEXT; + tmp= s->table->file->read_time(s->ref.key, 1, s->records); + } + else // table scan + { + tmp= s->scan_time(); + type= JT_ALL; + } + + if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache + { + /* + For each record we have to: + - read the whole table record + - skip rows which does not satisfy join condition + */ + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_MULT(record_count, COST_ADD(tmp,cmp_time)); + } + else + { + double refills= (1.0 + floor((double) cache_record_length(join,idx) * + (record_count / + (double) thd->variables.join_buff_size))); + tmp= COST_MULT(tmp, refills); + /* + We don't make full cartesian product between rows in the scanned + table and existing records because we skip all rows from the + scanned table, which does not satisfy join condition when + we read the table (see flush_cached_records for details). Here we + take into account cost to read and skip these records. + */ + double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE; + tmp= COST_ADD(tmp, cmp_time); + } + } + + trace_access_scan.add("access_type", type == JT_ALL ? + "scan" : + join_type_str[type]); + /* Splitting technique cannot be used with join cache */ + if (s->table->is_splittable()) + tmp+= s->table->get_materialization_cost(); + else + tmp+= s->startup_cost; + + /* + We estimate the cost of evaluating WHERE clause for found records + as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus + tmp give us total cost of using TABLE SCAN + */ + + const double best_filter_cmp_gain= best_filter + ? best_filter->get_cmp_gain(record_count * records) + : 0; + trace_access_scan.add("resulting_rows", rnd_records); + trace_access_scan.add("cost", tmp); + + if (best == DBL_MAX || + COST_ADD(tmp, record_count/TIME_FOR_COMPARE*rnd_records) < + (best_key->is_for_hash_join() ? best_time : + COST_ADD(best - best_filter_cmp_gain, + record_count/TIME_FOR_COMPARE*records))) + { + /* + If the table has a range (s->quick is set) make_join_select() + will ensure that this will be used + */ + best= tmp; + records= rnd_records; + best_key= 0; + best_filter= 0; + if (s->quick && s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + best_filter= filter; + /* range/index_merge/ALL/index access method are "independent", so: */ + best_ref_depends_map= 0; + best_uses_jbuf= MY_TEST(!disable_jbuf && + (join->allowed_outer_join_with_cache || + !(s->table->map & join->outer_join))); + spl_plan= 0; + best_type= type; + } + trace_access_scan.add("chosen", best_key == NULL); + } + else + { + trace_access_scan.add("type", "scan"); + trace_access_scan.add("chosen", false); + trace_access_scan.add("cause", "cost"); + } + + /* Update the cost information for the current partial plan */ + pos->records_read= records; + pos->read_time= best; + pos->key= best_key; + pos->type= best_type; + pos->table= s; + pos->ref_depend_map= best_ref_depends_map; + pos->loosescan_picker.loosescan_key= MAX_KEY; + pos->use_join_buffer= best_uses_jbuf; + pos->spl_plan= spl_plan; + pos->spl_pd_boundary= !spl_plan ? 0 : spl_pd_boundary; + pos->range_rowid_filter_info= best_filter; + pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 : + key_dependent & remaining_tables); + + loose_scan_opt.save_to_position(s, loose_scan_pos); + + if (!best_key && + idx == join->const_tables && + s->table == join->sort_by_table && + join->unit->lim.get_select_limit() >= records) + { + trace_access_scan.add("use_tmp_table", true); + join->sort_by_table= (TABLE*) 1; // Must use temporary table + } + trace_access_scan.end(); + trace_paths.end(); + + if (unlikely(thd->trace_started())) + print_best_access_for_table(thd, pos, best_type); + + DBUG_VOID_RETURN; +} + + +/* + Find JOIN_TAB's embedding (i.e, parent) subquery. + - For merged semi-joins, tables inside the semi-join nest have their + semi-join nest as parent. We intentionally ignore results of table + pullout action here. + - For non-merged semi-joins (JTBM tabs), the embedding subquery is the + JTBM join tab itself. +*/ + +static TABLE_LIST* get_emb_subq(JOIN_TAB *tab) +{ + TABLE_LIST *tlist= tab->table->pos_in_table_list; + if (tlist->jtbm_subselect) + return tlist; + TABLE_LIST *embedding= tlist->embedding; + if (!embedding || !embedding->sj_subq_pred) + return NULL; + return embedding; +} + + +/* + Choose initial table order that "helps" semi-join optimizations. + + The idea is that we should start with the order that is the same as the one + we would have had if we had semijoin=off: + - Top-level tables go first + - subquery tables are grouped together by the subquery they are in, + - subquery tables are attached where the subquery predicate would have been + attached if we had semi-join off. + + This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce + certain pre-liminary ordering, see compare_embedding_subqueries() for its + description. +*/ + +static void choose_initial_table_order(JOIN *join) +{ + TABLE_LIST *emb_subq; + JOIN_TAB **tab= join->best_ref + join->const_tables; + JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables; + DBUG_ENTER("choose_initial_table_order"); + /* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */ + for (; tab != tabs_end; tab++) + { + if ((emb_subq= get_emb_subq(*tab))) + break; + } + uint n_subquery_tabs= (uint)(tabs_end - tab); + + if (!n_subquery_tabs) + DBUG_VOID_RETURN; + + /* Copy the subquery JOIN_TABs to a separate array */ + JOIN_TAB *subquery_tabs[MAX_TABLES]; + memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs); + + JOIN_TAB **last_top_level_tab= tab; + JOIN_TAB **subq_tab= subquery_tabs; + JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs; + TABLE_LIST *cur_subq_nest= NULL; + for (; subq_tab < subq_tabs_end; subq_tab++) + { + if (get_emb_subq(*subq_tab)!= cur_subq_nest) + { + /* + Reached the part of subquery_tabs that covers tables in some subquery. + */ + cur_subq_nest= get_emb_subq(*subq_tab); + + /* Determine how many tables the subquery has */ + JOIN_TAB **last_tab_for_subq; + for (last_tab_for_subq= subq_tab; + last_tab_for_subq < subq_tabs_end && + get_emb_subq(*last_tab_for_subq) == cur_subq_nest; + last_tab_for_subq++) {} + uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab); + + /* + Walk the original array and find where this subquery would have been + attached to + */ + table_map need_tables= cur_subq_nest->original_subq_pred_used_tables; + need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS); + for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables; + top_level_tab < last_top_level_tab; + //top_level_tab < join->best_ref + join->table_count; + top_level_tab++) + { + need_tables &= ~(*top_level_tab)->table->map; + /* Check if this is the place where subquery should be attached */ + if (!need_tables) + { + /* Move away the top-level tables that are after top_level_tab */ + size_t top_tail_len= last_top_level_tab - top_level_tab - 1; + memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1, + sizeof(JOIN_TAB*)*top_tail_len); + last_top_level_tab += n_subquery_tables; + memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables); + break; + } + } + DBUG_ASSERT(!need_tables); + subq_tab += n_subquery_tables - 1; + } + } + DBUG_VOID_RETURN; +} + + +/** + Selects and invokes a search strategy for an optimal query plan. + + The function checks user-configurable parameters that control the search + strategy for an optimal plan, selects the search method and then invokes + it. Each specific optimization procedure stores the final optimal plan in + the array 'join->best_positions', and the cost of the plan in + 'join->best_read'. + + @param join pointer to the structure providing all context info for + the query + @param join_tables set of the tables in the query + + @retval + FALSE ok + @retval + TRUE Fatal error +*/ + +bool +choose_plan(JOIN *join, table_map join_tables) +{ + uint search_depth= join->thd->variables.optimizer_search_depth; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; + bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); + THD *thd= join->thd; + DBUG_ENTER("choose_plan"); + + join->cur_embedding_map= 0; + join->extra_heuristic_pruning= false; + join->prune_level= join->thd->variables.optimizer_prune_level; + + reset_nj_counters(join, join->join_list); + qsort2_cmp jtab_sort_func; + + if (join->emb_sjm_nest) + { + /* We're optimizing semi-join materialization nest, so put the + tables from this semi-join as first + */ + jtab_sort_func= join_tab_cmp_embedded_first; + } + else + { + /* + if (SELECT_STRAIGHT_JOIN option is set) + reorder tables so dependent tables come after tables they depend + on, otherwise keep tables in the order they were specified in the query + else + Apply heuristic: pre-sort all access plans with respect to the number of + records accessed. + */ + jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp; + } + + /* + psergey-todo: if we're not optimizing an SJM nest, + - sort that outer tables are first, and each sjm nest follows + - then, put each [sjm_table1, ... sjm_tableN] sub-array right where + WHERE clause pushdown would have put it. + */ + my_qsort2(join->best_ref + join->const_tables, + join->table_count - join->const_tables, sizeof(JOIN_TAB*), + jtab_sort_func, (void*)join->emb_sjm_nest); + + Json_writer_object wrapper(thd); + Json_writer_array trace_plan(thd,"considered_execution_plans"); + + if (!join->emb_sjm_nest) + { + choose_initial_table_order(join); + } + /* + Note: constant tables are already in the join prefix. We don't + put them into the cur_sj_inner_tables, though. + */ + join->cur_sj_inner_tables= 0; + + if (straight_join) + { + optimize_straight_join(join, join_tables); + } + else + { + DBUG_ASSERT(search_depth <= MAX_TABLES + 1); + if (search_depth == 0) + /* Automatically determine a reasonable value for 'search_depth' */ + search_depth= determine_search_depth(join); + + if (join->prune_level >= 1 && + search_depth >= thd->variables.optimizer_extra_pruning_depth) + { + join->extra_heuristic_pruning= true; + } + + if (greedy_search(join, join_tables, search_depth, use_cond_selectivity)) + DBUG_RETURN(TRUE); + } + + /* + Store the cost of this query into a user variable + Don't update last_query_cost for statements that are not "flat joins" : + i.e. they have subqueries, unions or call stored procedures. + TODO: calculate a correct cost for a query with subqueries and UNIONs. + */ + if (join->thd->lex->is_single_level_stmt()) + join->thd->status_var.last_query_cost= join->best_read; + DBUG_RETURN(FALSE); +} + + +/* + Compare two join tabs based on the subqueries they are from. + - top-level join tabs go first + - then subqueries are ordered by their select_id (we're using this + criteria because we need a cross-platform, deterministic ordering) + + @return + 0 - equal + -1 - jt1 < jt2 + 1 - jt1 > jt2 +*/ + +static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2) +{ + /* Determine if the first table is originally from a subquery */ + TABLE_LIST *tbl1= jt1->table->pos_in_table_list; + uint tbl1_select_no; + if (tbl1->jtbm_subselect) + { + tbl1_select_no= + tbl1->jtbm_subselect->unit->first_select()->select_number; + } + else if (tbl1->embedding && tbl1->embedding->sj_subq_pred) + { + tbl1_select_no= + tbl1->embedding->sj_subq_pred->unit->first_select()->select_number; + } + else + tbl1_select_no= 1; /* Top-level */ + + /* Same for the second table */ + TABLE_LIST *tbl2= jt2->table->pos_in_table_list; + uint tbl2_select_no; + if (tbl2->jtbm_subselect) + { + tbl2_select_no= + tbl2->jtbm_subselect->unit->first_select()->select_number; + } + else if (tbl2->embedding && tbl2->embedding->sj_subq_pred) + { + tbl2_select_no= + tbl2->embedding->sj_subq_pred->unit->first_select()->select_number; + } + else + tbl2_select_no= 1; /* Top-level */ + + /* + Put top-level tables in front. Tables from within subqueries must follow, + grouped by their owner subquery. We don't care about the order that + subquery groups are in, because choose_initial_table_order() will re-order + the groups. + */ + if (tbl1_select_no != tbl2_select_no) + return tbl1_select_no > tbl2_select_no ? 1 : -1; + return 0; +} + + +/** + Compare two JOIN_TAB objects based on the number of accessed records. + + @param ptr1 pointer to first JOIN_TAB object + @param ptr2 pointer to second JOIN_TAB object + + NOTES + The order relation implemented by join_tab_cmp() is not transitive, + i.e. it is possible to choose such a, b and c that (a < b) && (b < c) + but (c < a). This implies that result of a sort using the relation + implemented by join_tab_cmp() depends on the order in which + elements are compared, i.e. the result is implementation-specific. + Example: + a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0 + b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838 + c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0 + + As for subqueries, this function must produce order that can be fed to + choose_initial_table_order(). + + @retval + 1 if first is bigger + @retval + -1 if second is bigger + @retval + 0 if equal +*/ + +static int +join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2) +{ + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + int cmp; + + if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0) + return cmp; + /* + After that do ordering according to numbers of + records in the table. + */ + if (jt1->found_records > jt2->found_records) + return 1; + if (jt1->found_records < jt2->found_records) + return -1; + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/** + Same as join_tab_cmp, but for use with SELECT_STRAIGHT_JOIN. +*/ + +static int +join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2) +{ + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + + /* + We don't do subquery flattening if the parent or child select has + STRAIGHT_JOIN modifier. It is complicated to implement and the semantics + is hardly useful. + */ + DBUG_ASSERT(!jt1->emb_sj_nest); + DBUG_ASSERT(!jt2->emb_sj_nest); + + int cmp; + if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0) + return cmp; + + /* + We have to check dependency with straight_join as we don't reorder + later as we do for other plans in best_extension_by_limited_search(). + */ + if (jt1->dependent & jt2->table->map) + return 1; + if (jt2->dependent & jt1->table->map) + return -1; + + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/* + Same as join_tab_cmp but tables from within the given semi-join nest go + first. Used when the optimizing semi-join materialization nests. +*/ + +static int +join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2) +{ + const TABLE_LIST *emb_nest= (TABLE_LIST*) emb; + JOIN_TAB *jt1= *(JOIN_TAB**) ptr1; + JOIN_TAB *jt2= *(JOIN_TAB**) ptr2; + + if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest) + return -1; + if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest) + return 1; + + if (jt1->found_records > jt2->found_records) + return 1; + if (jt1->found_records < jt2->found_records) + return -1; + + return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0); +} + + +/** + Heuristic procedure to automatically guess a reasonable degree of + exhaustiveness for the greedy search procedure. + + The procedure estimates the optimization time and selects a search depth + big enough to result in a near-optimal QEP, that doesn't take too long to + find. If the number of tables in the query exceeds some constant, then + search_depth is set to this constant. + + @param join pointer to the structure providing all context info for + the query + + @note + This is an extremely simplistic implementation that serves as a stub for a + more advanced analysis of the join. Ideally the search depth should be + determined by learning from previous query optimizations, because it will + depend on the CPU power (and other factors). + + @todo + this value should be determined dynamically, based on statistics: + uint max_tables_for_exhaustive_opt= 7; + + @todo + this value could be determined by some mapping of the form: + depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE] + + @return + A positive integer that specifies the search depth (and thus the + exhaustiveness) of the depth-first search algorithm used by + 'greedy_search'. +*/ + +static uint +determine_search_depth(JOIN *join) +{ + uint table_count= join->table_count - join->const_tables; + uint search_depth; + /* TODO: this value should be determined dynamically, based on statistics: */ + uint max_tables_for_exhaustive_opt= 7; + + if (table_count <= max_tables_for_exhaustive_opt) + search_depth= table_count+1; // use exhaustive for small number of tables + else + /* + TODO: this value could be determined by some mapping of the form: + depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE] + */ + search_depth= max_tables_for_exhaustive_opt; // use greedy search + + return search_depth; +} + + +/** + Select the best ways to access the tables in a query without reordering them. + + Find the best access paths for each query table and compute their costs + according to their order in the array 'join->best_ref' (thus without + reordering the join tables). The function calls sequentially + 'best_access_path' for each table in the query to select the best table + access method. The final optimal plan is stored in the array + 'join->best_positions', and the corresponding cost in 'join->best_read'. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of the tables in the query + + @note + This function can be applied to: + - queries with STRAIGHT_JOIN + - internally to compute the cost of an arbitrary QEP + @par + Thus 'optimize_straight_join' can be used at any stage of the query + optimization process to finalize a QEP as it is. +*/ + +static void +optimize_straight_join(JOIN *join, table_map remaining_tables) +{ + JOIN_TAB *s; + uint idx= join->const_tables; + bool disable_jbuf= join->thd->variables.join_cache_level == 0; + double record_count= 1.0; + double read_time= 0.0; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; + POSITION loose_scan_pos; + THD *thd= join->thd; + + for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++) + { + POSITION *position= join->positions + idx; + Json_writer_object trace_one_table(thd); + if (unlikely(thd->trace_started())) + trace_plan_prefix(join, idx, remaining_tables); + /* Find the best access method from 's' to the current partial plan */ + best_access_path(join, s, remaining_tables, join->positions, idx, + disable_jbuf, record_count, + position, &loose_scan_pos); + + /* Compute the cost of the new plan extended with 's' */ + record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(record_count) + : 0; + read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + record_count / + TIME_FOR_COMPARE)); + optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time, + &loose_scan_pos); + + remaining_tables&= ~(s->table->map); + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + remaining_tables); + position->cond_selectivity= pushdown_cond_selectivity; + double partial_join_cardinality= record_count * + pushdown_cond_selectivity; + join->positions[idx].partial_join_cardinality= partial_join_cardinality; + ++idx; + } + + if (join->sort_by_table && + join->sort_by_table != join->positions[join->const_tables].table->table) + read_time+= record_count; // We have to make a temp table + memcpy((uchar*) join->best_positions, (uchar*) join->positions, + sizeof(POSITION)*idx); + join->join_record_count= record_count; + join->best_read= read_time - COST_EPS; +} + + +/** + Find a good, possibly optimal, query execution plan (QEP) by a greedy search. + + The search procedure uses a hybrid greedy/exhaustive search with controlled + exhaustiveness. The search is performed in N = card(remaining_tables) + steps. Each step evaluates how promising is each of the unoptimized tables, + selects the most promising table, and extends the current partial QEP with + that table. Currenly the most 'promising' table is the one with least + expensive extension.\ + + There are two extreme cases: + -# When (card(remaining_tables) < search_depth), the estimate finds the + best complete continuation of the partial QEP. This continuation can be + used directly as a result of the search. + -# When (search_depth == 1) the 'best_extension_by_limited_search' + consideres the extension of the current QEP with each of the remaining + unoptimized tables. + + All other cases are in-between these two extremes. Thus the parameter + 'search_depth' controlls the exhaustiveness of the search. The higher the + value, the longer the optimization time and possibly the better the + resulting plan. The lower the value, the fewer alternative plans are + estimated, but the more likely to get a bad QEP. + + All intermediate and final results of the procedure are stored in 'join': + - join->positions : modified for every partial QEP that is explored + - join->best_positions: modified for the current best complete QEP + - join->best_read : modified for the current best complete QEP + - join->best_ref : might be partially reordered + + The final optimal plan is stored in 'join->best_positions', and its + corresponding cost in 'join->best_read'. + + @note + The following pseudocode describes the algorithm of 'greedy_search': + + @code + procedure greedy_search + input: remaining_tables + output: pplan; + { + pplan = <>; + do { + (t, a) = best_extension(pplan, remaining_tables); + pplan = concat(pplan, (t, a)); + remaining_tables = remaining_tables - t; + } while (remaining_tables != {}) + return pplan; + } + + @endcode + where 'best_extension' is a placeholder for a procedure that selects the + most "promising" of all tables in 'remaining_tables'. + Currently this estimate is performed by calling + 'best_extension_by_limited_search' to evaluate all extensions of the + current QEP of size 'search_depth', thus the complexity of 'greedy_search' + mainly depends on that of 'best_extension_by_limited_search'. + + @par + If 'best_extension()' == 'best_extension_by_limited_search()', then the + worst-case complexity of this algorithm is <= + O(N*N^search_depth/search_depth). When serch_depth >= N, then the + complexity of greedy_search is O(N!). + + @par + In the future, 'greedy_search' might be extended to support other + implementations of 'best_extension', e.g. some simpler quadratic procedure. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of tables not included into the partial plan yet + @param search_depth controlls the exhaustiveness of the search + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account + + @retval + FALSE ok + @retval + TRUE Fatal error +*/ + +static bool +greedy_search(JOIN *join, + table_map remaining_tables, + uint search_depth, + uint use_cond_selectivity) +{ + double record_count= 1.0; + double read_time= 0.0; + uint idx= join->const_tables; // index into 'join->best_ref' + uint best_idx; + uint size_remain; // cardinality of remaining_tables + table_map usable_tables, eq_ref_tables; + POSITION best_pos; + JOIN_TAB *best_table; // the next plan node to be added to the curr QEP + // ==join->tables or # tables in the sj-mat nest we're optimizing + uint n_tables __attribute__((unused)); + DBUG_ENTER("greedy_search"); + + /* number of tables that remain to be optimized */ + usable_tables= (join->emb_sjm_nest ? + (join->emb_sjm_nest->sj_inner_tables & + ~join->const_table_map & remaining_tables): + remaining_tables); + n_tables= size_remain= my_count_bits(usable_tables); + + join->next_sort_position= join->sort_positions; + do { + /* + Find the extension of the current QEP with the lowest cost + We are using remaining_table instead of usable tables here as + in case of an emb_sjm_nest, we want to be able to check if + an embedded table is depending on an outer table. + */ + join->best_read= DBL_MAX; + if ((int) best_extension_by_limited_search(join, remaining_tables, idx, + record_count, + read_time, search_depth, + use_cond_selectivity, + &eq_ref_tables) < + (int) SEARCH_OK) + DBUG_RETURN(TRUE); + /* + 'best_read < DBL_MAX' means that optimizer managed to find + some plan and updated 'best_positions' array accordingly. + */ + DBUG_ASSERT(join->best_read < DBL_MAX); + + if (size_remain <= search_depth) + { + /* + 'join->best_positions' contains a complete optimal extension of the + current partial QEP. + */ + DBUG_EXECUTE("opt", print_plan(join, n_tables, + record_count, read_time, read_time, + "optimal");); + DBUG_RETURN(FALSE); + } + + /* select the first table in the optimal extension as most promising */ + best_pos= join->best_positions[idx]; + best_table= best_pos.table; + /* + Each subsequent loop of 'best_extension_by_limited_search' uses + 'join->positions' for cost estimates, therefore we have to update its + value. + */ + join->positions[idx]= best_pos; + + /* + Update the interleaving state after extending the current partial plan + with a new table. + We are doing this here because best_extension_by_limited_search reverts + the interleaving state to the one of the non-extended partial plan + on exit. + */ + bool is_interleave_error __attribute__((unused))= + check_interleaving_with_nj (best_table); + /* This has been already checked by best_extension_by_limited_search */ + DBUG_ASSERT(!is_interleave_error); + + /* + Also, update the semi-join optimization state. Information about the + picked semi-join operation is in best_pos->...picker, but we need to + update the global state in the JOIN object, too. + */ + if (!join->emb_sjm_nest) + update_sj_state(join, best_table, idx, remaining_tables); + + /* find the position of 'best_table' in 'join->best_ref' */ + best_idx= idx; + JOIN_TAB *pos= join->best_ref[best_idx]; + while (pos && best_table != pos) + pos= join->best_ref[++best_idx]; + DBUG_ASSERT((pos != NULL)); // should always find 'best_table' + + /* + Move 'best_table' at the first free position in the array of joins + We don't need to keep the array sorted as + best_extension_by_limited_search() will sort them. + */ + swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]); + + /* compute the cost of the new plan extended with 'best_table' */ + record_count= COST_MULT(record_count, join->positions[idx].records_read); + read_time= COST_ADD(read_time, + COST_ADD(join->positions[idx].read_time, + record_count / TIME_FOR_COMPARE)); + + remaining_tables&= ~(best_table->table->map); + --size_remain; + ++idx; + + DBUG_EXECUTE("opt", print_plan(join, idx, + record_count, read_time, read_time, + "extended");); + } while (TRUE); +} + + +/** + Get cost of execution and fanout produced by selected tables in the join + prefix (where prefix is defined as prefix in depth-first traversal) + + @param end_tab_idx The number of last tab to be taken into + account (in depth-first traversal prefix) + @param filter_map Bitmap of tables whose cost/fanout are to + be taken into account. + @param read_time_arg [out] store read time here + @param record_count_arg [out] store record count here + + @note + + @returns + read_time_arg and record_count_arg contain the computed cost and fanout +*/ + +void JOIN::get_partial_cost_and_fanout(int end_tab_idx, + table_map filter_map, + double *read_time_arg, + double *record_count_arg) +{ + double record_count= 1; + double read_time= 0.0; + double sj_inner_fanout= 1.0; + JOIN_TAB *end_tab= NULL; + JOIN_TAB *tab; + int i; + int last_sj_table= MAX_TABLES; + + /* + Handle a special case where the join is degenerate, and produces no + records + */ + if (table_count == const_tables) + { + *read_time_arg= 0.0; + /* + We return 1, because + - it is the pessimistic estimate (there might be grouping) + - it's safer, as we're less likely to hit the edge cases in + calculations. + */ + *record_count_arg=1.0; + return; + } + + for (tab= first_depth_first_tab(this), i= const_tables; + tab; + tab= next_depth_first_tab(this, tab), i++) + { + end_tab= tab; + if (i == end_tab_idx) + break; + } + + for (tab= first_depth_first_tab(this), i= const_tables; + ; + tab= next_depth_first_tab(this, tab), i++) + { + if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab) + { + /* + We've entered the SJM nest that contains the end_tab. The caller is + - interested in fanout inside the nest (because that's how many times + we'll invoke the attached WHERE conditions) + - not interested in cost + */ + record_count= 1.0; + read_time= 0.0; + } + + /* + Ignore fanout (but not cost) from sj-inner tables, as long as + the range that processes them finishes before the end_tab + */ + if (tab->sj_strategy != SJ_OPT_NONE) + { + sj_inner_fanout= 1.0; + last_sj_table= i + tab->n_sj_tables; + } + + table_map cur_table_map; + if (tab->table) + cur_table_map= tab->table->map; + else + { + /* This is a SJ-Materialization nest. Check all of its tables */ + TABLE *first_child= tab->bush_children->start->table; + TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding; + cur_table_map= sjm_nest->nested_join->used_tables; + } + if (tab->records_read && (cur_table_map & filter_map)) + { + record_count= COST_MULT(record_count, tab->records_read); + read_time= COST_ADD(read_time, + COST_ADD(tab->read_time, + record_count / TIME_FOR_COMPARE)); + if (tab->emb_sj_nest) + sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read); + } + + if (i == last_sj_table) + { + record_count /= sj_inner_fanout; + sj_inner_fanout= 1.0; + last_sj_table= MAX_TABLES; + } + + if (tab == end_tab) + break; + } + *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE; + *record_count_arg= record_count; +} + + +/* + Get prefix cost and fanout. This function is different from + get_partial_cost_and_fanout: + - it operates on a JOIN that haven't yet finished its optimization phase (in + particular, fix_semijoin_strategies_for_picked_join_order() and + get_best_combination() haven't been called) + - it assumes the the join prefix doesn't have any semi-join plans + + These assumptions are met by the caller of the function. +*/ + +void JOIN::get_prefix_cost_and_fanout(uint n_tables, + double *read_time_arg, + double *record_count_arg) +{ + double record_count= 1; + double read_time= 0.0; + for (uint i= const_tables; i < n_tables + const_tables ; i++) + { + if (best_positions[i].records_read) + { + record_count= COST_MULT(record_count, best_positions[i].records_read); + read_time= COST_ADD(read_time, best_positions[i].read_time); + } + /* TODO: Take into account condition selectivities here */ + } + *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE; + *record_count_arg= record_count; +} + + +/** + Estimate the number of rows that query execution will read. + + @todo This is a very pessimistic upper bound. Use join selectivity + when available to produce a more realistic number. +*/ + +double JOIN::get_examined_rows() +{ + double examined_rows; + double prev_fanout= 1; + double records; + JOIN_TAB *tab= first_breadth_first_tab(); + JOIN_TAB *prev_tab= tab; + + records= (double)tab->get_examined_rows(); + + while ((tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab))) + { + prev_fanout= COST_MULT(prev_fanout, prev_tab->records_read); + records= + COST_ADD(records, + COST_MULT((double) (tab->get_examined_rows()), prev_fanout)); + prev_tab= tab; + } + examined_rows= (double) + (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records); + return examined_rows; +} + + +/** + @brief + Get the selectivity of equalities between columns when joining a table + + @param join The optimized join + @param idx The number of tables in the evaluated partual join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + @param keyparts The number of key parts to used when joining s + @param ref_keyuse_steps Array of references to keyuses employed to join s +*/ + +static +double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables, uint keyparts, + uint16 *ref_keyuse_steps) +{ + double sel= 1.0; + COND_EQUAL *cond_equal= join->cond_equal; + + if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse) + return sel; + + Item_equal *item_equal; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + TABLE *table= s->table; + table_map table_bit= table->map; + POSITION *pos= &join->positions[idx]; + + while ((item_equal= it++)) + { + /* + Check whether we need to take into account the selectivity of + multiple equality item_equal. If this is the case multiply + the current value of sel by this selectivity + */ + table_map used_tables= item_equal->used_tables(); + if (!(used_tables & table_bit)) + continue; + if (item_equal->get_const()) + continue; + + bool adjust_sel= FALSE; + Item_equal_fields_iterator fi(*item_equal); + while((fi++) && !adjust_sel) + { + Field *fld= fi.get_curr_field(); + if (fld->table->map != table_bit) + continue; + if (pos->key == 0) + adjust_sel= TRUE; + else + { + uint i; + KEYUSE *keyuse= pos->key; + uint key= keyuse->key; + for (i= 0; i < keyparts; i++) + { + if (i > 0) + keyuse+= ref_keyuse_steps[i-1]; + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[i].fieldnr - 1; + if (fld->field_index == fldno) + break; + } + keyuse= pos->key; + + if (i == keyparts) + { + /* + Field fld is included in multiple equality item_equal + and is not a part of the ref key. + The selectivity of the multiple equality must be taken + into account unless one of the ref arguments is + equal to fld. + */ + adjust_sel= TRUE; + for (uint j= 0; j < keyparts && adjust_sel; j++) + { + if (j > 0) + keyuse+= ref_keyuse_steps[j-1]; + Item *ref_item= keyuse->val; + if (ref_item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field *) (ref_item->real_item()); + if (item_equal->contains(field_item->field)) + adjust_sel= FALSE; + } + } + } + } + } + if (adjust_sel) + { + /* + If ref == 0 and there are no fields in the multiple equality + item_equal that belong to the tables joined prior to s + then the selectivity of multiple equality will be set to 1.0. + */ + double eq_fld_sel= 1.0; + fi.rewind(); + while ((fi++)) + { + double curr_eq_fld_sel; + Field *fld= fi.get_curr_field(); + if (!(fld->table->map & ~(table_bit | rem_tables))) + continue; + curr_eq_fld_sel= get_column_avg_frequency(fld) / + fld->table->stat_records(); + if (curr_eq_fld_sel < 1.0) + set_if_bigger(eq_fld_sel, curr_eq_fld_sel); + } + sel*= eq_fld_sel; + } + } + return sel; +} + + +/** + @brief + Get the selectivity of conditions when joining a table + + @param join The optimized join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + + @detail + Get selectivity of conditions that can be applied when joining this table + with previous tables. + + For quick selects and full table scans, selectivity of COND(this_table) + is accounted for in apply_selectivity_for_table(). Here, we only count + selectivity of COND(this_table, previous_tables). + + For other access methods, we need to calculate selectivity of the whole + condition, "COND(this_table) AND COND(this_table, previous_tables)". + + @retval + selectivity of the conditions imposed on the rows of s +*/ + +static +double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables) +{ + uint16 ref_keyuse_steps_buf[MAX_REF_PARTS]; + uint ref_keyuse_size= MAX_REF_PARTS; + uint16 *ref_keyuse_steps= ref_keyuse_steps_buf; + Field *field; + TABLE *table= s->table; + MY_BITMAP *read_set= table->read_set; + double sel= s->table->cond_selectivity; + POSITION *pos= &join->positions[idx]; + uint keyparts= 0; + uint found_part_ref_or_null= 0; + + if (pos->key != 0) + { + /* + A ref access or hash join is used for this table. ref access is created + from + + tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ... + + and it will only return rows for which this condition is satisified. + Suppose, certain expr{i} is a constant. Since ref access only returns + rows that satisfy + + tbl.keypart{i}=const (*) + + then selectivity of this equality should not be counted in return value + of this function. This function uses the value of + + table->cond_selectivity=selectivity(COND(tbl)) (**) + + as a starting point. This value includes selectivity of equality (*). We + should somehow discount it. + + Looking at calculate_cond_selectivity_for_table(), one can see that that + the value is not necessarily a direct multiplicand in + table->cond_selectivity + + There are three possible ways to discount + 1. There is a potential range access on t.keypart{i}=const. + (an important special case: the used ref access has a const prefix for + which a range estimate is available) + + 2. The field has a histogram. field[x]->cond_selectivity has the data. + + 3. Use index stats on this index: + rec_per_key[key_part+1]/rec_per_key[key_part] + + (TODO: more details about the "t.key=othertable.col" case) + */ + KEYUSE *keyuse= pos->key; + KEYUSE *prev_ref_keyuse= keyuse; + uint key= keyuse->key; + bool used_range_selectivity= false; + + /* + Check if we have a prefix of key=const that matches a quick select. + */ + if (!is_hash_join_key_no(key) && table->opt_range_keys.is_set(key)) + { + key_part_map quick_key_map= (key_part_map(1) << + table->opt_range[key].key_parts) - 1; + if (table->opt_range[key].rows && + !(quick_key_map & ~table->const_key_parts[key])) + { + /* + Ok, there is an equality for each of the key parts used by the + quick select. This means, quick select's estimate can be reused to + discount the selectivity of a prefix of a ref access. + */ + for (; quick_key_map & 1 ; quick_key_map>>= 1) + { + while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keyparts) + { + keyuse++; + } + keyparts++; + } + /* + Here we discount selectivity of the constant range CR. To calculate + this selectivity we use elements from the quick_rows[] array. + If we have indexes i1,...,ik with the same prefix compatible + with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could + be used for this calculation but here we don't know which one was + actually used. So sel could be greater than 1 and we have to cap it. + However if sel becomes greater than 2 then with high probability + something went wrong. + */ + sel /= (double)table->opt_range[key].rows / (double) table->stat_records(); + set_if_smaller(sel, 1.0); + used_range_selectivity= true; + } + } + + /* + Go through the "keypart{N}=..." equalities and find those that were + already taken into account in table->cond_selectivity. + */ + keyuse= pos->key; + keyparts=0; + while (keyuse->table == table && keyuse->key == key) + { + if (!(keyuse->used_tables & (rem_tables | table->map))) + { + if (are_tables_local(s, keyuse->val->used_tables())) + { + if (is_hash_join_key_no(key)) + { + if (keyparts == keyuse->keypart) + keyparts++; + } + else + { + if (keyparts == keyuse->keypart && + !((keyuse->val->used_tables()) & ~pos->ref_depend_map) && + !(found_part_ref_or_null & keyuse->optimize)) + { + /* Found a KEYUSE object that will be used by ref access */ + keyparts++; + found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; + } + } + + if (keyparts > keyuse->keypart) + { + /* Ok this is the keyuse that will be used for ref access */ + if (!used_range_selectivity && keyuse->val->const_item()) + { + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; + + if (table->field[fldno]->cond_selectivity > 0) + { + sel /= table->field[fldno]->cond_selectivity; + set_if_smaller(sel, 1.0); + } + /* + TODO: we could do better here: + 1. cond_selectivity might be =1 (the default) because quick + select on some index prevented us from analyzing + histogram for this column. + 2. we could get an estimate through this? + rec_per_key[key_part-1] / rec_per_key[key_part] + */ + } + if (keyparts > 1) + { + /* + Prepare to set ref_keyuse_steps[keyparts-2]: resize the array + if it is not large enough + */ + if (keyparts - 2 >= ref_keyuse_size) + { + uint new_size= MY_MAX(ref_keyuse_size*2, keyparts); + void *new_buf; + if (!(new_buf= my_malloc(PSI_INSTRUMENT_ME, + sizeof(*ref_keyuse_steps)*new_size, + MYF(0)))) + { + sel= 1.0; // As if no selectivity was computed + goto exit; + } + memcpy(new_buf, ref_keyuse_steps, + sizeof(*ref_keyuse_steps)*ref_keyuse_size); + if (ref_keyuse_steps != ref_keyuse_steps_buf) + my_free(ref_keyuse_steps); + + ref_keyuse_steps= (uint16*)new_buf; + ref_keyuse_size= new_size; + } + + ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse); + prev_ref_keyuse= keyuse; + } + } + } + } + keyuse++; + } + } + else + { + /* + The table is accessed with full table scan, or quick select. + Selectivity of COND(table) is already accounted for in + apply_selectivity_for_table(). + */ + sel= 1; + } + + /* + If the field f from the table is equal to a field from one the + earlier joined tables then the selectivity of the range conditions + over the field f must be discounted. + + We need to discount selectivity only if we're using ref-based + access method (and have sel!=1). + If we use ALL/range/index_merge, then sel==1, and no need to discount. + */ + if (pos->key != NULL) + { + for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (!bitmap_is_set(read_set, field->field_index) || + !field->next_equal_field) + continue; + for (Field *next_field= field->next_equal_field; + next_field != field; + next_field= next_field->next_equal_field) + { + if (!(next_field->table->map & rem_tables) && next_field->table != table) + { + if (field->cond_selectivity > 0) + { + sel/= field->cond_selectivity; + set_if_smaller(sel, 1.0); + } + break; + } + } + } + } + + sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, + keyparts, ref_keyuse_steps); +exit: + if (ref_keyuse_steps != ref_keyuse_steps_buf) + my_free(ref_keyuse_steps); + return sel; +} + + +/* + Check if the table is an EQ_REF or similar table and there is no cost + to gain by moveing it to a later stage. + We call such a table a edge table (or hanging leaf) as it will read at + most one row and will not add to the number of row combinations in the join. +*/ + +static inline enum_best_search +check_if_edge_table(POSITION *pos, + double pushdown_cond_selectivity) +{ + + if ((pos->type == JT_EQ_REF || + (pos->type == JT_REF && + pos->records_read == 1 && + !pos->range_rowid_filter_info)) && + pushdown_cond_selectivity >= 0.999) + return SEARCH_FOUND_EDGE; + return SEARCH_OK; +} + + +struct SORT_POSITION +{ + JOIN_TAB **join_tab; + POSITION *position; +}; + + +/* + Sort SORT_POSITIONS according to expected number of rows found + If number of combinations are the same sort according to join_tab order + (same table order as used in the original SQL query) +*/ + +static int +sort_positions(SORT_POSITION *a, SORT_POSITION *b) +{ + int cmp; + if ((cmp= compare_embedding_subqueries(*a->join_tab, *b->join_tab)) != 0) + return cmp; + + if (a->position->records_read > b->position->records_read) + return 1; + if (a->position->records_read < b->position->records_read) + return -1; + return CMP_NUM(*a->join_tab, *b->join_tab); +} + + +/* + Call best_access_path() for a set of tables and collect results + + @param join JOIN object + @param trace_one_table Current optimizer_trace + @param pos Pointer to remanining tables + @param allowed_tables bitmap of allowed tables. On return set to + the collected tables. + @param store_poisition Points to where to store next found SORT_POSITION. + Will be updated to next free position. + @param stop_on_eq_ref Stop searching for more tables if we found an EQ_REF + table. + + @return + 0 Normal + 1 Eq_ref table found (only if stop_on_eq_ref is used) + + join->next_sort_position will be update to next free position. +*/ + +static bool +get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, + double record_count, + Json_writer_object *trace_one_table, + JOIN_TAB **pos, SORT_POSITION **store_position, + table_map *allowed_tables, + bool stop_on_eq_ref) +{ + THD *thd= join->thd; + POSITION *sort_position= join->next_sort_position; + SORT_POSITION *sort_end= *store_position; + JOIN_TAB *s; + table_map found_tables= 0; + bool found_eq_ref= 0; + bool disable_jbuf= join->thd->variables.join_cache_level == 0; + DBUG_ENTER("get_plans_for_tables"); + + s= *pos; + do + { + table_map real_table_bit= s->table->map; + if ((*allowed_tables & real_table_bit) && + !(remaining_tables & s->dependent)) + { +#ifdef DBUG_ASSERT_EXISTS + DBUG_ASSERT(!check_interleaving_with_nj(s)); + restore_prev_nj_state(s); // Revert effect of check_... call +#endif + sort_end->join_tab= pos; + sort_end->position= sort_position; + + + Json_writer_object wrapper(thd); + /* Find the best access method from 's' to the current partial plan */ + best_access_path(join, s, remaining_tables, join->positions, idx, + disable_jbuf, record_count, + sort_position, sort_position + 1); + found_tables|= s->table->map; + sort_end++; + sort_position+= 2; + if (unlikely(stop_on_eq_ref) && sort_position[-2].type == JT_EQ_REF) + { + /* Found an eq_ref tables. Use this, ignoring the other tables */ + found_eq_ref= 1; + if (found_tables == s->table->map) + break; // First table + + /* Store the found eq_ref table first in store_position */ + sort_position-= 2; + *allowed_tables= s->table->map; + (*store_position)->join_tab= pos; + (*store_position)->position= sort_position; + (*store_position)++; + join->next_sort_position[0]= sort_position[0]; + join->next_sort_position[1]= sort_position[1]; + join->next_sort_position+= 2; + DBUG_RETURN(1); + } + } + else + { + /* Verify that 'allowed_current_tables' was calculated correctly */ + DBUG_ASSERT((remaining_tables & s->dependent) || + !(remaining_tables & real_table_bit) || + !(*allowed_tables & real_table_bit) || + check_interleaving_with_nj(s)); + } + } while ((s= *++pos)); + + *allowed_tables= found_tables; + *store_position= sort_end; + join->next_sort_position= sort_position; + DBUG_RETURN(found_eq_ref); +} + +/** + Find a good, possibly optimal, query execution plan (QEP) by a possibly + exhaustive search. + + The procedure searches for the optimal ordering of the query tables in set + 'remaining_tables' of size N, and the corresponding optimal access paths to + each table. The choice of a table order and an access path for each table + constitutes a query execution plan (QEP) that fully specifies how to + execute the query. + + The maximal size of the found plan is controlled by the parameter + 'search_depth'. When search_depth == N, the resulting plan is complete and + can be used directly as a QEP. If search_depth < N, the found plan consists + of only some of the query tables. Such "partial" optimal plans are useful + only as input to query optimization procedures, and cannot be used directly + to execute a query. + + The algorithm begins with an empty partial plan stored in 'join->positions' + and a set of N tables - 'remaining_tables'. Each step of the algorithm + evaluates the cost of the partial plan extended by all access plans for + each of the relations in 'remaining_tables', expands the current partial + plan with the access plan that results in lowest cost of the expanded + partial plan, and removes the corresponding relation from + 'remaining_tables'. The algorithm continues until it either constructs a + complete optimal plan, or constructs an optimal plartial plan with size = + search_depth. + + The final optimal plan is stored in 'join->best_positions'. The + corresponding cost of the optimal plan is in 'join->best_read'. + + @note + The procedure uses a recursive depth-first search where the depth of the + recursion (and thus the exhaustiveness of the search) is controlled by the + parameter 'search_depth'. + + @note + The pseudocode below describes the algorithm of + 'best_extension_by_limited_search'. The worst-case complexity of this + algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then + the complexity of greedy_search is O(N!). + + @code + procedure best_extension_by_limited_search( + pplan in, // in, partial plan of tables-joined-so-far + pplan_cost, // in, cost of pplan + remaining_tables, // in, set of tables not referenced in pplan + best_plan_so_far, // in/out, best plan found so far + best_plan_so_far_cost,// in/out, cost of best_plan_so_far + search_depth) // in, maximum size of the plans being considered + { + for each table T from remaining_tables + { + // Calculate the cost of using table T as above + cost = complex-series-of-calculations; + + // Add the cost to the cost so far. + pplan_cost+= cost; + + if (pplan_cost >= best_plan_so_far_cost) + // pplan_cost already too great, stop search + continue; + + pplan= expand pplan by best_access_method; + remaining_tables= remaining_tables - table T; + if (remaining_tables is not an empty set + and + search_depth > 1) + { + best_extension_by_limited_search(pplan, pplan_cost, + remaining_tables, + best_plan_so_far, + best_plan_so_far_cost, + search_depth - 1); + } + else + { + best_plan_so_far_cost= pplan_cost; + best_plan_so_far= pplan; + } + } + } + @endcode + + @note + When 'best_extension_by_limited_search' is called for the first time, + 'join->best_read' must be set to the largest possible value (e.g. DBL_MAX). + The actual implementation provides a way to optionally use pruning + heuristic to reduce the search space by skipping some partial plans. + + @note + The parameter 'search_depth' provides control over the recursion + depth, and thus the size of the resulting optimal plan. + + @param join pointer to the structure providing all context info + for the query + @param remaining_tables set of tables not included into the partial plan yet + @param idx length of the partial QEP in 'join->positions'; + since a depth-first search is used, also corresponds + to the current depth of the search tree; + also an index in the array 'join->best_ref'; + @param record_count estimate for the number of records returned by the + best partial plan + @param read_time the cost of the best partial plan + @param search_depth maximum depth of the recursion and thus size of the + found optimal plan + (0 < search_depth <= join->tables+1). + (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS) + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account + + @retval + enum_best_search::SEARCH_OK All fine + @retval + enum_best_search::SEARCH_FOUND_EDGE All remaning tables are edge tables + @retval + enum_best_search::SEARCH_ABORT Killed by user + @retval + enum_best_search::SEARCH_ERROR Fatal error +*/ + + +static enum_best_search +best_extension_by_limited_search(JOIN *join, + table_map remaining_tables, + uint idx, + double record_count, + double read_time, + uint search_depth, + uint use_cond_selectivity, + table_map *processed_eq_ref_tables) +{ + THD *thd= join->thd; + /* + 'join' is a partial plan with lower cost than the best plan so far, + so continue expanding it further with the tables in 'remaining_tables'. + */ + JOIN_TAB *s; + double best_record_count= DBL_MAX; + double best_read_time= DBL_MAX; + enum_best_search best_res; + uint tables_left= join->table_count - idx, found_tables; + uint accepted_tables __attribute__((unused)); + table_map found_eq_ref_tables= 0, used_eq_ref_table= 0; + table_map allowed_tables, allowed_current_tables; + SORT_POSITION *sort= (SORT_POSITION*) alloca(sizeof(SORT_POSITION)*tables_left); + SORT_POSITION *sort_end; + DBUG_ENTER("best_extension_by_limited_search"); + + DBUG_EXECUTE_IF("show_explain_probe_best_ext_lim_search", + if (dbug_user_var_equals_int(thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(thd, 1); + ); + + if (unlikely(thd->check_killed())) // Abort + DBUG_RETURN(SEARCH_ABORT); + + DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, + "part_plan");); + status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls); + + if (join->emb_sjm_nest) + { + /* + If we are searching for the execution plan of a materialized semi-join nest + then allowed_tables contains bits only for the tables from this nest. + */ + allowed_tables= (join->emb_sjm_nest->sj_inner_tables & remaining_tables); + allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + } + else + { + /* + allowed_tables is used to check if there are tables left that can improve + a key search and to see if there are more tables to add in next iteration. + + allowed_current_tables tells us which tables we can add to the current + plan at this stage. + */ + allowed_tables= remaining_tables; + allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + } + DBUG_ASSERT(allowed_tables & remaining_tables); + + sort_end= sort; + { + Json_writer_object trace_one_table(thd); + JOIN_TAB **best_ref= join->best_ref + idx; + if (unlikely(thd->trace_started())) + trace_plan_prefix(join, idx, remaining_tables); + + Json_writer_array arr(thd, "get_costs_for_tables"); + + if (idx > join->const_tables && join->prune_level >= 2 && + join->positions[idx-1].type == JT_EQ_REF && + (join->eq_ref_tables & allowed_current_tables)) + { + /* Previous table was an EQ REF table, only add other possible EQ_REF + tables to the chain, stop after first one is found. + */ + table_map table_map= join->eq_ref_tables & allowed_current_tables; + if (get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, + &table_map, 1)) + used_eq_ref_table= (*sort->join_tab)->table->map; + else + { + /* We didn't find another EQ_REF table, add remaining tables */ + if ((table_map= allowed_current_tables & ~table_map)) + get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, &table_map, + 0); + } + } + else + { + table_map table_map= allowed_current_tables; + get_costs_for_tables(join, remaining_tables, idx, record_count, + &trace_one_table, best_ref, &sort_end, &table_map, + 0); + } + found_tables= (uint) (sort_end - sort); + DBUG_ASSERT(found_tables > 0); + + /* + Sort tables in ascending order of generated row combinations + */ + if (found_tables > 1) + my_qsort(sort, found_tables, sizeof(SORT_POSITION), + (qsort_cmp) sort_positions); + } + DBUG_ASSERT(join->next_sort_position <= + join->sort_positions + join->sort_space); + + accepted_tables= 0; + double min_rec_count= DBL_MAX; + double min_rec_count_read_time= DBL_MAX; + + double min_cost= DBL_MAX; + double min_cost_record_count= DBL_MAX; + + for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++) + { + s= *pos->join_tab; + if (!(found_eq_ref_tables & s->table->map) && + !check_interleaving_with_nj(s)) + { + table_map real_table_bit= s->table->map; + double current_record_count, current_read_time; + double partial_join_cardinality; + POSITION *position= join->positions + idx, *loose_scan_pos; + Json_writer_object trace_one_table(thd); + + if (unlikely(thd->trace_started())) + { + trace_plan_prefix(join, idx, remaining_tables); + trace_one_table.add_table_name(s); + } + + accepted_tables++; + *position= *pos->position; // Get stored result + loose_scan_pos= pos->position+1; + + /* Compute the cost of the new plan extended with 's' */ + current_record_count= COST_MULT(record_count, position->records_read); + const double filter_cmp_gain= position->range_rowid_filter_info + ? position->range_rowid_filter_info->get_cmp_gain(current_record_count) + : 0; + current_read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + current_record_count / + TIME_FOR_COMPARE)); + + if (unlikely(thd->trace_started())) + { + trace_one_table.add("rows_for_plan", current_record_count); + trace_one_table.add("cost_for_plan", current_read_time); + } + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + ¤t_read_time, loose_scan_pos); + + /* Expand only partial plans with lower cost than the best QEP so far */ + if (current_read_time >= join->best_read) + { + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "prune_by_cost");); + trace_one_table + .add("pruned_by_cost", true) + .add("current_cost", current_read_time) + .add("best_cost", join->best_read + COST_EPS); + + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + + /* + Prune some less promising partial plans. This heuristic may miss + the optimal QEPs, thus it results in a non-exhaustive search. + */ + if (join->prune_level >= 1) + { + // Collect the members with min_cost and min_read_time. + bool min_rec_hit= false; + bool min_cost_hit= false; + + if (join->extra_heuristic_pruning && + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) + { + if (current_record_count < min_rec_count) + { + min_rec_count= current_record_count; + min_rec_count_read_time= current_read_time; + min_rec_hit= true; + } + + if (current_read_time < min_cost) + { + min_cost_record_count= current_record_count; + min_cost= current_read_time; + min_cost_hit= true; + } + } + + if (best_record_count > current_record_count || + best_read_time > current_read_time || + (idx == join->const_tables && // 's' is the first table in the QEP + s->table == join->sort_by_table)) + { + /* + Store the current record count and cost as the best + possible cost at this level if the following holds: + - It's the lowest record number and cost so far + - There is no remaing table that could improve index usage + or we found an EQ_REF or REF key with less than 2 + matching records (good enough). + */ + if (best_record_count >= current_record_count && + best_read_time >= current_read_time && + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) + { + best_record_count= current_record_count; + best_read_time= current_read_time; + } + } + else + { + /* + Typically, we get here if: + best_record_count < current_record_count && + best_read_time < current_read_time + That is, both record_count and read_time are worse than the best_ + ones. This plan doesn't look promising, prune it away. + */ + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "pruned_by_heuristic");); + trace_one_table.add("pruned_by_heuristic", true); + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + + const char* prune_reason= NULL; + if (!min_rec_hit && + current_record_count >= min_rec_count && + current_read_time >= min_rec_count_read_time) + prune_reason= "min_record_count"; + + if (!min_cost_hit && + current_record_count >= min_cost_record_count && + current_read_time >= min_cost) + prune_reason= "min_read_time"; + + if (prune_reason) + { + trace_one_table.add("pruned_by_heuristic", prune_reason); + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + continue; + } + } + + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + remaining_tables & + ~real_table_bit); + join->positions[idx].cond_selectivity= pushdown_cond_selectivity; + + partial_join_cardinality= (current_record_count * + pushdown_cond_selectivity); + + if (unlikely(thd->trace_started())) + { + if (pushdown_cond_selectivity < 1.0) + { + trace_one_table.add("selectivity", pushdown_cond_selectivity); + trace_one_table.add("estimated_join_cardinality", + partial_join_cardinality); + } + } + + join->positions[idx].partial_join_cardinality= partial_join_cardinality; + + if ((search_depth > 1) && (remaining_tables & ~real_table_bit) & + allowed_tables) + { + /* Recursively expand the current partial plan */ + Json_writer_array trace_rest(thd, "rest_of_plan"); + + swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab); + best_res= + best_extension_by_limited_search(join, + remaining_tables & + ~real_table_bit, + idx + 1, + partial_join_cardinality, + current_read_time, + search_depth - 1, + use_cond_selectivity, + &found_eq_ref_tables); + swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab); + + if ((int) best_res < (int) SEARCH_OK) + goto end; // Return best_res + if (best_res == SEARCH_FOUND_EDGE && + check_if_edge_table(join->positions+ idx, + pushdown_cond_selectivity) != + SEARCH_FOUND_EDGE) + best_res= SEARCH_OK; + } + else + { + /* + 'join' is either the best partial QEP with 'search_depth' relations, + or the best complete QEP so far, whichever is smaller. + */ + if (join->sort_by_table && + join->sort_by_table != + join->positions[join->const_tables].table->table) + { + /* + We may have to make a temp table, note that this is only a + heuristic since we cannot know for sure at this point. + Hence it may be wrong. + */ + trace_one_table.add("cost_for_sorting", current_record_count); + current_read_time= COST_ADD(current_read_time, current_record_count); + } + if (current_read_time < join->best_read) + { + memcpy((uchar*) join->best_positions, (uchar*) join->positions, + sizeof(POSITION) * (idx + 1)); + join->join_record_count= partial_join_cardinality; + join->best_read= current_read_time - COST_EPS; + } + DBUG_EXECUTE("opt", print_plan(join, idx+1, + current_record_count, + read_time, + current_read_time, + "full_plan");); + best_res= check_if_edge_table(join->positions + idx, + pushdown_cond_selectivity); + } + restore_prev_nj_state(s); + restore_prev_sj_state(remaining_tables, s, idx); + if (best_res == SEARCH_FOUND_EDGE) + { + if (pos+1 < sort_end) // If not last table + trace_one_table.add("pruned_by_hanging_leaf", true); + goto end; + } + } + } + DBUG_ASSERT(accepted_tables > 0); + best_res= SEARCH_OK; + +end: + join->next_sort_position-= found_tables*2; + if (used_eq_ref_table) + *processed_eq_ref_tables|= used_eq_ref_table | found_eq_ref_tables; + else + *processed_eq_ref_tables= 0; + DBUG_RETURN(best_res); +} + + +/** + Find how much space the prevous read not const tables takes in cache. +*/ + +void JOIN_TAB::calc_used_field_length(bool max_fl) +{ + uint null_fields,blobs,fields; + ulong rec_length; + Field **f_ptr,*field; + uint uneven_bit_fields; + MY_BITMAP *read_set= table->read_set; + + uneven_bit_fields= null_fields= blobs= fields= rec_length=0; + for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (bitmap_is_set(read_set, field->field_index)) + { + uint flags=field->flags; + fields++; + rec_length+=field->pack_length(); + if (flags & BLOB_FLAG) + blobs++; + if (!(flags & NOT_NULL_FLAG)) + null_fields++; + if (field->type() == MYSQL_TYPE_BIT && + ((Field_bit*)field)->bit_len) + uneven_bit_fields++; + } + } + if (null_fields || uneven_bit_fields) + rec_length+=(table->s->null_fields+7)/8; + if (table->maybe_null) + rec_length+=sizeof(my_bool); + + /* Take into account that DuplicateElimination may need to store rowid */ + uint rowid_add_size= 0; + if (keep_current_rowid) + { + rowid_add_size= table->file->ref_length; + rec_length += rowid_add_size; + fields++; + } + + if (max_fl) + { + // TODO: to improve this estimate for max expected length + if (blobs) + { + ulong blob_length= table->file->stats.mean_rec_length; + if (ULONG_MAX - rec_length > blob_length) + rec_length+= blob_length; + else + rec_length= ULONG_MAX; + } + max_used_fieldlength= rec_length; + } + else if (table->file->stats.mean_rec_length) + set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size); + + used_fields=fields; + used_fieldlength=rec_length; + used_blobs=blobs; + used_null_fields= null_fields; + used_uneven_bit_fields= uneven_bit_fields; +} + + +/* + @brief + Extract pushdown conditions for a table scan + + @details + This functions extracts pushdown conditions usable when this table is scanned. + The conditions are extracted either from WHERE or from ON expressions. + The conditions are attached to the field cache_select of this table. + + @note + Currently the extracted conditions are used only by BNL and BNLH join. + algorithms. + + @retval 0 on success + 1 otherwise +*/ + +int JOIN_TAB::make_scan_filter() +{ + COND *tmp; + DBUG_ENTER("make_scan_filter"); + + Item *cond= is_inner_table_of_outer_join() ? + *get_first_inner_table()->on_expr_ref : join->conds; + + if (cond) + { + if ((tmp= make_cond_for_table(join->thd, cond, + join->const_table_map | table->map, + table->map, -1, FALSE, TRUE))) + { + DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); + if (!(cache_select= + (SQL_SELECT*) join->thd->memdup((uchar*) select, + sizeof(SQL_SELECT)))) + DBUG_RETURN(1); + cache_select->cond= tmp; + cache_select->read_tables=join->const_table_map; + } + else if (join->thd->is_error()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/** + @brief + Check whether hash join algorithm can be used to join this table + + @details + This function finds out whether the ref items that have been chosen + by the planner to access this table can be used for hash join algorithms. + The answer depends on a certain property of the the fields of the + joined tables on which the hash join key is built. + + @note + At present the function is supposed to be called only after the function + get_best_combination has been called. + + @retval TRUE it's possible to use hash join to join this table + @retval FALSE otherwise +*/ + +bool JOIN_TAB::hash_join_is_possible() +{ + if (type != JT_REF && type != JT_EQ_REF) + return FALSE; + if (!is_ref_for_hash_join()) + { + KEY *keyinfo= table->key_info + ref.key; + return keyinfo->key_part[0].field->hash_join_is_possible(); + } + return TRUE; +} + + +/** + @brief + Check whether a KEYUSE can be really used for access this join table + + @param join Join structure with the best join order + for which the check is performed + @param keyuse Evaluated KEYUSE structure + + @details + This function is supposed to be used after the best execution plan have been + already chosen and the JOIN_TAB array for the best join order been already set. + For a given KEYUSE to access this JOIN_TAB in the best execution plan the + function checks whether it really can be used. The function first performs + the check with access_from_tables_is_allowed(). If it succeeds it checks + whether the keyuse->val does not use some fields of a materialized semijoin + nest that cannot be used to build keys to access outer tables. + Such KEYUSEs exists for the query like this: + select * from ot + where ot.c in (select it1.c from it1, it2 where it1.c=f(it2.c)) + Here we have two KEYUSEs to access table ot: with val=it1.c and val=f(it2.c). + However if the subquery was materialized the second KEYUSE cannot be employed + to access ot. + + @retval true the given keyuse can be used for ref access of this JOIN_TAB + @retval false otherwise +*/ + +bool JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan(JOIN *join, + KEYUSE *keyuse) +{ + if (!access_from_tables_is_allowed(keyuse->used_tables, + join->sjm_lookup_tables)) + return false; + if (join->sjm_scan_tables & table->map) + return true; + table_map keyuse_sjm_scan_tables= keyuse->used_tables & + join->sjm_scan_tables; + if (!keyuse_sjm_scan_tables) + return true; + uint sjm_tab_nr= 0; + while (!(keyuse_sjm_scan_tables & table_map(1) << sjm_tab_nr)) + sjm_tab_nr++; + JOIN_TAB *sjm_tab= join->map2table[sjm_tab_nr]; + TABLE_LIST *emb_sj_nest= sjm_tab->emb_sj_nest; + if (!(emb_sj_nest->sj_mat_info && emb_sj_nest->sj_mat_info->is_used && + emb_sj_nest->sj_mat_info->is_sj_scan)) + return true; + st_select_lex *sjm_sel= emb_sj_nest->sj_subq_pred->unit->first_select(); + for (uint i= 0; i < sjm_sel->item_list.elements; i++) + { + DBUG_ASSERT(sjm_sel->ref_pointer_array[i]->real_item()->type() == Item::FIELD_ITEM); + if (keyuse->val->real_item()->type() == Item::FIELD_ITEM) + { + Field *field = ((Item_field*)sjm_sel->ref_pointer_array[i]->real_item())->field; + if (field->eq(((Item_field*)keyuse->val->real_item())->field)) + return true; + } + } + return false; +} + + +static uint +cache_record_length(JOIN *join,uint idx) +{ + uint length=0; + JOIN_TAB **pos,**end; + + for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ; + pos != end ; + pos++) + { + JOIN_TAB *join_tab= *pos; + length+= join_tab->get_used_fieldlength(); + } + return length; +} + + +/* + Get the number of different row combinations for subset of partial join + + SYNOPSIS + prev_record_reads() + join The join structure + idx Number of tables in the partial join order (i.e. the + partial join order is in join->positions[0..idx-1]) + found_ref Bitmap of tables for which we need to find # of distinct + row combinations. + + DESCRIPTION + Given a partial join order (in join->positions[0..idx-1]) and a subset of + tables within that join order (specified in found_ref), find out how many + distinct row combinations of subset tables will be in the result of the + partial join order. + + This is used as follows: Suppose we have a table accessed with a ref-based + method. The ref access depends on current rows of tables in found_ref. + We want to count # of different ref accesses. We assume two ref accesses + will be different if at least one of access parameters is different. + Example: consider a query + + SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field + + and a join order: + t1, ref access on t1.key=c1 + t2, ref access on t2.key=c2 + t3, ref access on t3.key=t1.field + + For t1: n_ref_scans = 1, n_distinct_ref_scans = 1 + For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1 + For t3: n_ref_scans = records_read(t1)*records_read(t2) + n_distinct_ref_scans = #records_read(t1) + + The reason for having this function (at least the latest version of it) + is that we need to account for buffering in join execution. + + An edge-case example: if we have a non-first table in join accessed via + ref(const) or ref(param) where there is a small number of different + values of param, then the access will likely hit the disk cache and will + not require any disk seeks. + + The proper solution would be to assume an LRU disk cache of some size, + calculate probability of cache hits, etc. For now we just count + identical ref accesses as one. + + RETURN + Expected number of row combinations +*/ + +double +prev_record_reads(const POSITION *positions, uint idx, table_map found_ref) +{ + double found=1.0; + const POSITION *pos_end= positions - 1; + for (const POSITION *pos= positions + idx - 1; pos != pos_end; pos--) + { + if (pos->table->table->map & found_ref) + { + found_ref|= pos->ref_depend_map; + /* + For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table + with no matching row we will get position[t2].records_read==0. + Actually the size of output is one null-complemented row, therefore + we will use value of 1 whenever we get records_read==0. + + Note + - the above case can't occur if inner part of outer join has more + than one table: table with no matches will not be marked as const. + + - Ideally we should add 1 to records_read for every possible null- + complemented row. We're not doing it because: 1. it will require + non-trivial code and add overhead. 2. The value of records_read + is an inprecise estimate and adding 1 (or, in the worst case, + #max_nested_outer_joins=64-1) will not make it any more precise. + */ + if (pos->records_read) + { + found= COST_MULT(found, pos->records_read); + found*= pos->cond_selectivity; + } + } + } + return found; +} + + +/* + Enumerate join tabs in breadth-first fashion, including const tables. +*/ + +static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, + uint n_top_tabs_count, JOIN_TAB *tab) +{ + n_top_tabs_count += tab->join->aggr_tables; + if (!tab->bush_root_tab) + { + /* We're at top level. Get the next top-level tab */ + tab++; + if (tab < first_top_tab + n_top_tabs_count) + return tab; + + /* No more top-level tabs. Switch to enumerating SJM nest children */ + tab= first_top_tab; + } + else + { + /* We're inside of an SJM nest */ + if (!tab->last_leaf_in_bush) + { + /* There's one more table in the nest, return it. */ + return ++tab; + } + else + { + /* + There are no more tables in this nest. Get out of it and then we'll + proceed to the next nest. + */ + tab= tab->bush_root_tab + 1; + } + } + + /* + Ok, "tab" points to a top-level table, and we need to find the next SJM + nest and enter it. + */ + for (; tab < first_top_tab + n_top_tabs_count; tab++) + { + if (tab->bush_children) + return tab->bush_children->start; + } + return NULL; +} + + +/* + Enumerate JOIN_TABs in "EXPLAIN order". This order + - const tabs are included + - we enumerate "optimization tabs". + - +*/ + +JOIN_TAB *first_explain_order_tab(JOIN* join) +{ + JOIN_TAB* tab; + tab= join->join_tab; + if (!tab) + return NULL; /* Can happen when when the tables were optimized away */ + return (tab->bush_children) ? tab->bush_children->start : tab; +} + + +JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab) +{ + /* If we're inside SJM nest and have reached its end, get out */ + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing */ + tab++; + + if (tab == join->join_tab + join->top_join_tab_count) + return NULL; /* Outside SJM nest and reached EOF */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + + +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls) +{ + JOIN_TAB *tab= join->join_tab; + if (const_tbls == WITHOUT_CONST_TABLES) + { + if (join->const_tables == join->table_count || !tab) + return NULL; + tab += join->const_tables; + } + return tab; +} + + +JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab) +{ + tab= next_breadth_first_tab(join->first_breadth_first_tab(), + join->top_join_tab_count, tab); + if (tab && tab->bush_root_tab) + tab= NULL; + return tab; +} + + +JOIN_TAB *first_linear_tab(JOIN *join, + enum enum_with_bush_roots include_bush_roots, + enum enum_with_const_tables const_tbls) +{ + JOIN_TAB *first= join->join_tab; + + if (!first) + return NULL; + + if (const_tbls == WITHOUT_CONST_TABLES) + first+= join->const_tables; + + if (first >= join->join_tab + join->top_join_tab_count) + return NULL; /* All are const tables */ + + if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + return first->bush_children->start; + } + + return first; +} + + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABs that represent + SJM-scan/lookups are either returned or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to) + + For example, if we have this structure: + + ot1--ot2--sjm1----------------ot3-... + | + +--it1--it2--it3 + + calls to next_linear_tab( include_bush_roots=TRUE) will return: + + ot1 ot2 sjm1 it1 it2 it3 ot3 ... + + while calls to next_linear_tab( include_bush_roots=FALSE) will return: + + ot1 ot2 it1 it2 it3 ot3 ... + + (note that sjm1 won't be returned). +*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots) +{ + if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + return tab->bush_children->start; + } + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); + + if (tab->bush_root_tab) /* Are we inside an SJM nest */ + { + /* Inside SJM nest */ + if (!tab->last_leaf_in_bush) + return tab+1; /* Return next in nest */ + /* Continue from the sjm on the top level */ + tab= tab->bush_root_tab; + } + + /* If no more JOIN_TAB's on the top level */ + if (++tab >= join->join_tab + join->exec_join_tab_cnt() + join->aggr_tables) + return NULL; + + if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children) + { + /* This JOIN_TAB is a SJM nest; Start from first table in nest */ + tab= tab->bush_children->start; + } + return tab; +} + + +/* + Start to iterate over all join tables in bush-children-first order, excluding + the const tables (see next_depth_first_tab() comment for details) +*/ + +JOIN_TAB *first_depth_first_tab(JOIN* join) +{ + JOIN_TAB* tab; + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_join_tab_count || !join->join_tab) + return NULL; + + tab= join->join_tab + join->const_tables; + + return (tab->bush_children) ? tab->bush_children->start : tab; +} + + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1--ot2--sjm1------------ot3-... + | + | + it1--it2--it3 + + call to first_depth_first_tab() will return ot1, and subsequent calls to + next_depth_first_tab() will return: + + ot2 it1 it2 it3 sjm ot3 ... +*/ + +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + /* If we're inside SJM nest and have reached its end, get out */ + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing */ + tab++; + + if (tab == join->join_tab +join->top_join_tab_count) + return NULL; /* Outside SJM nest and reached EOF */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + +bool JOIN::check_two_phase_optimization(THD *thd) +{ + if (check_for_splittable_materialized()) + return true; + return false; +} + + +bool JOIN::inject_cond_into_where(Item *injected_cond) +{ + Item *where_item= injected_cond; + List<Item> *and_args= NULL; + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + { + and_args= ((Item_cond*) conds)->argument_list(); + if (cond_equal) + and_args->disjoin((List<Item> *) &cond_equal->current_level); + } + + where_item= and_items(thd, conds, where_item); + if (where_item->fix_fields_if_needed(thd, 0)) + return true; + thd->change_item_tree(&select_lex->where, where_item); + select_lex->where->top_level_item(); + conds= select_lex->where; + + if (and_args && cond_equal) + { + and_args= ((Item_cond*) conds)->argument_list(); + List_iterator<Item_equal> li(cond_equal->current_level); + Item_equal *elem; + while ((elem= li++)) + { + and_args->push_back(elem, thd->mem_root); + } + } + + return false; + +} + + +static Item * const null_ptr= NULL; + + +/* + Set up join struct according to the picked join order in + + SYNOPSIS + get_best_combination() + join The join to process (the picked join order is mainly in + join->best_positions) + + DESCRIPTION + Setup join structures according the picked join order + - finalize semi-join strategy choices (see + fix_semijoin_strategies_for_picked_join_order) + - create join->join_tab array and put there the JOIN_TABs in the join order + - create data structures describing ref access methods. + + NOTE + In this function we switch from pre-join-optimization JOIN_TABs to + post-join-optimization JOIN_TABs. This is achieved by copying the entire + JOIN_TAB objects. + + RETURN + FALSE OK + TRUE Out of memory +*/ + +bool JOIN::get_best_combination() +{ + uint tablenr; + table_map used_tables; + JOIN_TAB *j; + KEYUSE *keyuse; + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_nest_root= NULL; + DBUG_ENTER("get_best_combination"); + + /* + Additional plan nodes for postjoin tmp tables: + 1? + // For GROUP BY + 1? + // For DISTINCT + 1? + // For aggregation functions aggregated in outer query + // when used with distinct + 1? + // For ORDER BY + 1? // buffer result + Up to 2 tmp tables are actually used, but it's hard to tell exact number + at this stage. + */ + uint aggr_tables= (group_list ? 1 : 0) + + (select_distinct ? + (tmp_table_param.using_outer_summary_function ? 2 : 1) : 0) + + (order ? 1 : 0) + + (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ; + + if (aggr_tables == 0) + aggr_tables= 1; /* For group by pushdown */ + + if (select_lex->window_specs.elements) + aggr_tables++; + + if (aggr_tables > 2) + aggr_tables= 2; + + full_join=0; + hash_join= FALSE; + + fix_semijoin_strategies_for_picked_join_order(this); + top_join_tab_count= get_number_of_tables_at_top_level(this); + +#ifndef DBUG_OFF + dbug_join_tab_array_size= top_join_tab_count + aggr_tables; +#endif + /* + NOTE: The above computation of aggr_tables can produce wrong result because some + of the variables it uses may change their values after we leave this function. + Known examples: + - Dangerous: using_outer_summary_function=false at this point. Added + DBUG_ASSERT below to demonstrate. Can this cause us to allocate less + space than we would need? + - Not dangerous: select_distinct can be true here but be assigned false + afterwards. + */ + aggr_tables= 2; + DBUG_ASSERT(!tmp_table_param.using_outer_summary_function); + if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)* + (top_join_tab_count + aggr_tables)))) + DBUG_RETURN(TRUE); + + if (inject_splitting_cond_for_all_tables_with_split_opt()) + goto error; + + JOIN_TAB_RANGE *root_range; + if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE)) + goto error; + root_range->start= join_tab; + /* root_range->end will be set later */ + join_tab_ranges.empty(); + + if (join_tab_ranges.push_back(root_range, thd->mem_root)) + goto error; + + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) + { + TABLE *form; + POSITION *cur_pos= &best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + */ + bzero((void*)j, sizeof(JOIN_TAB)); + j->join= this; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->on_expr_ref= (Item**) &null_ptr; + j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */ + + /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records_read= (sjm->is_sj_scan? sjm->rows : 1); + j->records= (ha_rows) j->records_read; + j->cond_selectivity= 1.0; + JOIN_TAB *jt; + JOIN_TAB_RANGE *jt_range; + if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || + !(jt_range= new JOIN_TAB_RANGE)) + goto error; + jt_range->start= jt; + jt_range->end= jt + sjm->tables; + join_tab_ranges.push_back(jt_range, thd->mem_root); + j->bush_children= jt_range; + sjm_nest_end= jt + sjm->tables; + sjm_nest_root= j; + + j= jt; + } + + *j= *best_positions[tablenr].table; + + j->bush_root_tab= sjm_nest_root; + + form= table[tablenr]= j->table; + form->reginfo.join_tab=j; + DBUG_PRINT("info",("type: %d", j->type)); + if (j->type == JT_CONST) + goto loop_end; // Handled in make_join_stat.. + + j->loosescan_match_tab= NULL; //non-nulls will be set later + j->inside_loosescan_range= FALSE; + j->ref.key = -1; + j->ref.key_parts=0; + + if (j->type == JT_SYSTEM) + goto loop_end; + if ( !(keyuse= best_positions[tablenr].key)) + { + j->type=JT_ALL; + if (best_positions[tablenr].use_join_buffer && + tablenr != const_tables) + full_join= 1; + } + + /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN) + { + DBUG_ASSERT(!keyuse || keyuse->key == + best_positions[tablenr].loosescan_picker.loosescan_key); + j->index= best_positions[tablenr].loosescan_picker.loosescan_key; + }*/ + + if ((j->type == JT_REF || j->type == JT_EQ_REF) && + is_hash_join_key_no(j->ref.key)) + hash_join= TRUE; + + j->range_rowid_filter_info= best_positions[tablenr].range_rowid_filter_info; + + loop_end: + /* + Save records_read in JOIN_TAB so that select_describe()/etc don't have + to access join->best_positions[]. + */ + j->records_read= best_positions[tablenr].records_read; + j->cond_selectivity= best_positions[tablenr].cond_selectivity; + map2table[j->table->tablenr]= j; + + /* If we've reached the end of sjm nest, switch back to main sequence */ + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_nest_root; + sjm_nest_root= NULL; + sjm_nest_end= NULL; + } + } + root_range->end= j; + + used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) + { + if (j->bush_children) + j= j->bush_children->start; + + used_tables|= j->table->map; + if (j->type != JT_CONST && j->type != JT_SYSTEM) + { + if ((keyuse= best_positions[tablenr].key) && + create_ref_for_key(this, j, keyuse, TRUE, used_tables)) + goto error; // Something went wrong + } + if (j->last_leaf_in_bush) + j= j->bush_root_tab; + } + + top_join_tab_count= (uint)(join_tab_ranges.head()->end - + join_tab_ranges.head()->start); + + if (unlikely(thd->trace_started())) + print_final_join_order(this); + + update_depend_map(this); + DBUG_RETURN(0); + +error: + /* join_tab was not correctly setup. Don't use it */ + join_tab= 0; + DBUG_RETURN(1); +} + +/** + Create a descriptor of hash join key to access a given join table + + @param join join which the join table belongs to + @param join_tab the join table to access + @param org_keyuse beginning of the key uses to join this table + @param used_tables bitmap of the previous tables + + @details + This function first finds key uses that can be utilized by the hash join + algorithm to join join_tab to the previous tables marked in the bitmap + used_tables. The tested key uses are taken from the array of all key uses + for 'join' starting from the position org_keyuse. After all interesting key + uses have been found the function builds a descriptor of the corresponding + key that is used by the hash join algorithm would it be chosen to join + the table join_tab. + + @retval FALSE the descriptor for a hash join key is successfully created + @retval TRUE otherwise +*/ + +static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab, + KEYUSE *org_keyuse, table_map used_tables) +{ + KEY *keyinfo; + KEY_PART_INFO *key_part_info; + KEYUSE *keyuse= org_keyuse; + uint key_parts= 0; + THD *thd= join->thd; + TABLE *table= join_tab->table; + bool first_keyuse= TRUE; + DBUG_ENTER("create_hj_key_for_table"); + + do + { + if (!(~used_tables & keyuse->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) && + are_tables_local(join_tab, keyuse->used_tables)) + { + if (first_keyuse) + { + key_parts++; + } + else + { + KEYUSE *curr= org_keyuse; + for( ; curr < keyuse; curr++) + { + if (curr->keypart == keyuse->keypart && + !(~used_tables & curr->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, + curr) && + are_tables_local(join_tab, curr->used_tables)) + break; + } + if (curr == keyuse) + key_parts++; + } + } + first_keyuse= FALSE; + keyuse++; + } while (keyuse->table == table && keyuse->is_for_hash_join()); + if (!key_parts) + DBUG_RETURN(TRUE); + /* This memory is allocated only once for the joined table join_tab */ + if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) || + !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)* + key_parts))) + DBUG_RETURN(TRUE); + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->key_part= key_part_info; + keyinfo->key_length=0; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->flags= HA_GENERATED_KEY; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->name.str= "$hj"; + keyinfo->name.length= 3; + keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts); + if (!keyinfo->rec_per_key) + DBUG_RETURN(TRUE); + keyinfo->key_part= key_part_info; + + first_keyuse= TRUE; + keyuse= org_keyuse; + do + { + if (!(~used_tables & keyuse->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) && + are_tables_local(join_tab, keyuse->used_tables)) + { + bool add_key_part= TRUE; + if (!first_keyuse) + { + for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++) + { + if (curr->keypart == keyuse->keypart && + !(~used_tables & curr->used_tables) && + join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, + curr) && + are_tables_local(join_tab, curr->used_tables)) + { + keyuse->keypart= NO_KEYPART; + add_key_part= FALSE; + break; + } + } + } + if (add_key_part) + { + Field *field= table->field[keyuse->keypart]; + uint fieldnr= keyuse->keypart+1; + table->create_key_part_by_field(key_part_info, field, fieldnr); + keyinfo->key_length += key_part_info->store_length; + key_part_info++; + } + } + first_keyuse= FALSE; + keyuse++; + } while (keyuse->table == table && keyuse->is_for_hash_join()); + + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->ext_key_part_map= 0; + + join_tab->hj_key= keyinfo; + + DBUG_RETURN(FALSE); +} + +/* + Check if a set of tables specified by used_tables can be accessed when + we're doing scan on join_tab jtab. +*/ +static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables) +{ + if (jtab->bush_root_tab) + { + /* + jtab is inside execution join nest. We may not refer to outside tables, + except the const tables. + */ + table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables | + jtab->join->const_table_map | + OUTER_REF_TABLE_BIT; + return !MY_TEST(used_tables & ~local_tables); + } + + /* + If we got here then jtab is at top level. + - all other tables at top level are accessible, + - tables in join nests are accessible too, because all their columns that + are needed at top level will be unpacked when scanning the + materialization table. + */ + return TRUE; +} + +static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, + KEYUSE *org_keyuse, bool allow_full_scan, + table_map used_tables) +{ + uint keyparts, length, key; + TABLE *table; + KEY *keyinfo; + KEYUSE *keyuse= org_keyuse; + bool ftkey= (keyuse->keypart == FT_KEYPART); + THD *thd= join->thd; + DBUG_ENTER("create_ref_for_key"); + + /* Use best key from find_best */ + table= j->table; + key= keyuse->key; + if (!is_hash_join_key_no(key)) + keyinfo= table->key_info+key; + else + { + if (create_hj_key_for_table(join, j, org_keyuse, used_tables)) + DBUG_RETURN(TRUE); + keyinfo= j->hj_key; + } + + if (ftkey) + { + Item_func_match *ifm=(Item_func_match *)keyuse->val; + + length=0; + keyparts=1; + ifm->join_key=1; + } + else + { + keyparts=length=0; + uint found_part_ref_or_null= 0; + /* + Calculate length for the used key + Stop if there is a missing key part or when we find second key_part + with KEY_OPTIMIZE_REF_OR_NULL + */ + do + { + if (!(~used_tables & keyuse->used_tables) && + (!keyuse->validity_ref || *keyuse->validity_ref) && + j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse)) + { + if (are_tables_local(j, keyuse->val->used_tables())) + { + if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) || + (!is_hash_join_key_no(key) && keyparts == keyuse->keypart && + !(found_part_ref_or_null & keyuse->optimize))) + { + length+= keyinfo->key_part[keyparts].store_length; + keyparts++; + found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; + } + } + } + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + + if (!keyparts && allow_full_scan) + { + /* It's a LooseIndexScan strategy scanning whole index */ + j->type= JT_ALL; + j->index= key; + DBUG_RETURN(FALSE); + } + + DBUG_ASSERT(length > 0); + DBUG_ASSERT(keyparts != 0); + } /* not ftkey */ + + /* set up fieldref */ + j->ref.key_parts= keyparts; + j->ref.key_length= length; + j->ref.key= (int) key; + if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) || + !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) * + (keyparts+1)))) || + !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) || + !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts))) + { + DBUG_RETURN(TRUE); + } + j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length); + j->ref.key_err=1; + j->ref.has_record= FALSE; + j->ref.null_rejecting= 0; + j->ref.disable_cache= FALSE; + j->ref.null_ref_part= NO_REF_PART; + j->ref.const_ref_part_map= 0; + j->ref.uses_splitting= FALSE; + keyuse=org_keyuse; + + store_key **ref_key= j->ref.key_copy; + uchar *key_buff=j->ref.key_buff, *null_ref_key= 0; + uint null_ref_part= NO_REF_PART; + bool keyuse_uses_no_tables= TRUE; + uint not_null_keyparts= 0; + if (ftkey) + { + j->ref.items[0]=((Item_func*)(keyuse->val))->key_item(); + /* Predicates pushed down into subquery can't be used FT access */ + j->ref.cond_guards[0]= NULL; + if (keyuse->used_tables) + DBUG_RETURN(TRUE); // not supported yet. SerG + + j->type=JT_FT; + } + else + { + uint i; + for (i=0 ; i < keyparts ; keyuse++,i++) + { + while (((~used_tables) & keyuse->used_tables) || + (keyuse->validity_ref && !(*keyuse->validity_ref)) || + !j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) || + keyuse->keypart == NO_KEYPART || + (keyuse->keypart != + (is_hash_join_key_no(key) ? + keyinfo->key_part[i].field->field_index : i)) || + !are_tables_local(j, keyuse->val->used_tables())) + keyuse++; /* Skip other parts */ + + uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); + j->ref.items[i]=keyuse->val; // Save for cond removal + j->ref.cond_guards[i]= keyuse->cond_guard; + + if (!keyuse->val->maybe_null() || keyuse->null_rejecting) + not_null_keyparts++; + /* + Set ref.null_rejecting to true only if we are going to inject a + "keyuse->val IS NOT NULL" predicate. + */ + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) + j->ref.null_rejecting|= (key_part_map)1 << i; + + keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; + j->ref.uses_splitting |= (keyuse->validity_ref != NULL); + /* + We don't want to compute heavy expressions in EXPLAIN, an example would + select * from t1 where t1.key=(select thats very heavy); + + (select thats very heavy) => is a constant here + eg: (select avg(order_cost) from orders) => constant but expensive + */ + if (!keyuse->val->used_tables() && !thd->lex->describe) + { // Compare against constant + store_key_item tmp(thd, + keyinfo->key_part[i].field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + keyinfo->key_part[i].length, + keyuse->val, + FALSE); + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + tmp.copy(thd); + j->ref.const_ref_part_map |= key_part_map(1) << i ; + } + else + { + *ref_key++= get_store_key(thd, + keyuse,join->const_table_map, + &keyinfo->key_part[i], + key_buff, maybe_null); + if (!keyuse->val->used_tables()) + j->ref.const_ref_part_map |= key_part_map(1) << i ; + } + /* + Remember if we are going to use REF_OR_NULL + But only if field _really_ can be null i.e. we force JT_REF + instead of JT_REF_OR_NULL in case if field can't be null + */ + if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null) + { + null_ref_key= key_buff; + null_ref_part= i; + } + key_buff+= keyinfo->key_part[i].store_length; + } + } /* not ftkey */ + *ref_key=0; // end_marker + if (j->type == JT_FT) + DBUG_RETURN(0); + ulong key_flags= j->table->actual_key_flags(keyinfo); + if (j->type == JT_CONST) + j->table->const_table= 1; + else if (!((keyparts == keyinfo->user_defined_key_parts && + ( + (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME || + /* Unique key and all keyparts are NULL rejecting */ + ((key_flags & HA_NOSAME) && keyparts == not_null_keyparts) + )) || + /* true only for extended keys */ + (keyparts > keyinfo->user_defined_key_parts && + MY_TEST(key_flags & HA_EXT_NOSAME) && + keyparts == keyinfo->ext_key_parts) + ) || + null_ref_key) + { + /* Must read with repeat */ + j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF; + j->ref.null_ref_key= null_ref_key; + j->ref.null_ref_part= null_ref_part; + } + else if (keyuse_uses_no_tables) + { + /* + This happen if we are using a constant expression in the ON part + of an LEFT JOIN. + SELECT * FROM a LEFT JOIN b ON b.key=30 + Here we should not mark the table as a 'const' as a field may + have a 'normal' value or a NULL value. + */ + j->type=JT_CONST; + } + else + j->type=JT_EQ_REF; + + if (j->type == JT_EQ_REF) + j->read_record.unlock_row= join_read_key_unlock_row; + else if (j->type == JT_CONST) + j->read_record.unlock_row= join_const_unlock_row; + else + j->read_record.unlock_row= rr_unlock_row; + DBUG_RETURN(0); +} + + + +static store_key * +get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, + KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null) +{ + if (!((~used_tables) & keyuse->used_tables)) // if const item + { + return new store_key_const_item(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + keyuse->val); + } + else if (keyuse->val->type() == Item::FIELD_ITEM || + (keyuse->val->type() == Item::REF_ITEM && + ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF && + (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() == + Item_ref::DIRECT_REF) || + ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) && + keyuse->val->real_item()->type() == Item::FIELD_ITEM)) + return new store_key_field(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + ((Item_field*) keyuse->val->real_item())->field, + keyuse->val->real_item()->full_name()); + + return new store_key_item(thd, + key_part->field, + key_buff + maybe_null, + maybe_null ? key_buff : 0, + key_part->length, + keyuse->val, FALSE); +} + + +inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + + +/** + Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions + we've inferred from ref/eq_ref access performed. + + This function is a part of "Early NULL-values filtering for ref access" + optimization. + + Example of this optimization: + For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n + and plan " any-access(t1), ref(t2.key=t1.field) " @n + add "t1.field IS NOT NULL" to t1's table condition. @n + + Description of the optimization: + + We look through equalities chosen to perform ref/eq_ref access, + pick equalities that have form "tbl.part_of_key = othertbl.field" + (where othertbl is a non-const table and othertbl.field may be NULL) + and add them to conditions on correspoding tables (othertbl in this + example). + + Exception from that is the case when referred_tab->join != join. + I.e. don't add NOT NULL constraints from any embedded subquery. + Consider this query: + @code + SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1 + WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL; + @endocde + Here condition A.f3 IS NOT NULL is going to be added to the WHERE + condition of the embedding query. + Another example: + SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL) + AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12 + WHERE t12.b = t10.a )); + Here condition t10.a IS NOT NULL is going to be added. + In both cases addition of NOT NULL condition will erroneously reject + some rows of the result set. + referred_tab->join != join constraint would disallow such additions. + + This optimization doesn't affect the choices that ref, range, or join + optimizer make. This was intentional because this was added after 4.1 + was GA. + + Implementation overview + 1. update_ref_and_keys() accumulates info about null-rejecting + predicates in in KEY_FIELD::null_rejecting + 1.1 add_key_part saves these to KEYUSE. + 2. create_ref_for_key copies them to TABLE_REF. + 3. add_not_null_conds adds "x IS NOT NULL" to join_tab->select_cond of + appropiate JOIN_TAB members. +*/ + +static void add_not_null_conds(JOIN *join) +{ + JOIN_TAB *tab; + DBUG_ENTER("add_not_null_conds"); + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->type == JT_REF || tab->type == JT_EQ_REF || + tab->type == JT_REF_OR_NULL) + { + for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++) + { + if (tab->ref.null_rejecting & ((key_part_map)1 << keypart)) + { + Item *item= tab->ref.items[keypart]; + Item *notnull; + Item *real= item->real_item(); + if (real->can_eval_in_optimize() && real->type() != Item::FIELD_ITEM) + { + /* + It could be constant instead of field after constant + propagation. + */ + continue; + } + DBUG_ASSERT(real->type() == Item::FIELD_ITEM); + Item_field *not_null_item= (Item_field*)real; + JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab; + /* + For UPDATE queries such as: + UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1); + not_null_item is the t1.f1, but it's referred_tab is 0. + */ + if (!(notnull= new (join->thd->mem_root) + Item_func_isnotnull(join->thd, item))) + DBUG_VOID_RETURN; + /* + We need to do full fix_fields() call here in order to have correct + notnull->const_item(). This is needed e.g. by test_quick_select + when it is called from make_join_select after this function is + called. + */ + if (notnull->fix_fields(join->thd, ¬null)) + DBUG_VOID_RETURN; + + DBUG_EXECUTE("where",print_where(notnull, + (referred_tab ? + referred_tab->table->alias.c_ptr() : + "outer_ref_cond"), + QT_ORDINARY);); + if (!tab->first_inner) + { + COND *new_cond= (referred_tab && referred_tab->join == join) ? + referred_tab->select_cond : + join->outer_ref_cond; + add_cond_and_fix(join->thd, &new_cond, notnull); + if (referred_tab && referred_tab->join == join) + referred_tab->set_select_cond(new_cond, __LINE__); + else + join->outer_ref_cond= new_cond; + } + else + add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull); + } + } + } + } + DBUG_VOID_RETURN; +} + +/** + Build a predicate guarded by match variables for embedding outer joins. + The function recursively adds guards for predicate cond + assending from tab to the first inner table next embedding + nested outer join and so on until it reaches root_tab + (root_tab can be 0). + + In other words: + add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should + wrap parts of WHERE. The idea is that the part of WHERE should be only + evaluated after we've finished figuring out whether outer joins. + ^^^ is the above correct? + + @param tab the first inner table for most nested outer join + @param cond the predicate to be guarded (must be set) + @param root_tab the first inner table to stop + + @return + - pointer to the guarded predicate, if success + - 0, otherwise +*/ + +static COND* +add_found_match_trig_cond(THD *thd, JOIN_TAB *tab, COND *cond, + JOIN_TAB *root_tab) +{ + COND *tmp; + DBUG_ASSERT(cond != 0); + if (tab == root_tab) + return cond; + if ((tmp= add_found_match_trig_cond(thd, tab->first_upper, cond, root_tab))) + tmp= new (thd->mem_root) Item_func_trig_cond(thd, tmp, &tab->found); + if (tmp) + { + tmp->quick_fix_field(); + tmp->update_used_tables(); + } + return tmp; +} + + +bool TABLE_LIST::is_active_sjm() +{ + return sj_mat_info && sj_mat_info->is_used; +} + + +/** + Fill in outer join related info for the execution plan structure. + + For each outer join operation left after simplification of the + original query the function set up the following pointers in the linear + structure join->join_tab representing the selected execution plan. + The first inner table t0 for the operation is set to refer to the last + inner table tk through the field t0->last_inner. + Any inner table ti for the operation are set to refer to the first + inner table ti->first_inner. + The first inner table t0 for the operation is set to refer to the + first inner table of the embedding outer join operation, if there is any, + through the field t0->first_upper. + The on expression for the outer join operation is attached to the + corresponding first inner table through the field t0->on_expr_ref. + Here ti are structures of the JOIN_TAB type. + + In other words, for each join tab, set + - first_inner + - last_inner + - first_upper + - on_expr_ref, cond_equal + + EXAMPLE. For the query: + @code + SELECT * FROM t1 + LEFT JOIN + (t2, t3 LEFT JOIN t4 ON t3.a=t4.a) + ON (t1.a=t2.a AND t1.b=t3.b) + WHERE t1.c > 5, + @endcode + + given the execution plan with the table order t1,t2,t3,t4 + is selected, the following references will be set; + t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2] + t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2], + on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to + *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref. + + @param join reference to the info fully describing the query + + @note + The function assumes that the simplification procedure has been + already applied to the join query (see simplify_joins). + This function can be called only after the execution plan + has been chosen. +*/ + +static bool +make_outerjoin_info(JOIN *join) +{ + DBUG_ENTER("make_outerjoin_info"); + + /* + Create temp. tables for merged SJ-Materialization nests. We need to do + this now, because further code relies on tab->table and + tab->table->pos_in_table_list being set. + */ + JOIN_TAB *tab; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->bush_children) + { + if (setup_sj_materialization_part1(tab)) + DBUG_RETURN(TRUE); + tab->table->reginfo.join_tab= tab; + } + } + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + TABLE *table= tab->table; + TABLE_LIST *tbl= table->pos_in_table_list; + TABLE_LIST *embedding= tbl->embedding; + + if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) + { + /* + Table tab is the only one inner table for outer join. + (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a + is in the query above.) + */ + tab->last_inner= tab->first_inner= tab; + tab->on_expr_ref= &tbl->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding && !embedding->is_active_sjm()) + tab->first_upper= embedding->nested_join->first_nested; + } + else if (!embedding) + tab->table->reginfo.not_exists_optimize= 0; + + for ( ; embedding ; embedding= embedding->embedding) + { + if (embedding->is_active_sjm()) + { + /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */ + break; + } + /* Ignore sj-nests: */ + if (!(embedding->on_expr && embedding->outer_join)) + { + tab->table->reginfo.not_exists_optimize= 0; + continue; + } + NESTED_JOIN *nested_join= embedding->nested_join; + if (!nested_join->counter) + { + /* + Table tab is the first inner table for nested_join. + Save reference to it in the nested join structure. + */ + nested_join->first_nested= tab; + tab->on_expr_ref= &embedding->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding->embedding) + tab->first_upper= embedding->embedding->nested_join->first_nested; + } + if (!tab->first_inner) + tab->first_inner= nested_join->first_nested; + if (++nested_join->counter < nested_join->n_tables) + break; + /* Table tab is the last inner table for nested join. */ + nested_join->first_nested->last_inner= tab; + } + } + DBUG_RETURN(FALSE); +} + + +/* + @brief + Build a temporary join prefix condition for JOIN_TABs up to the last tab + + @param ret OUT the condition is returned here + + @return + false OK + true Out of memory + + @detail + Walk through the join prefix (from the first table to the last_tab) and + build a condition: + + join_tab_1_cond AND join_tab_2_cond AND ... AND last_tab_conds + + The condition is only intended to be used by the range optimizer, so: + - it is not normalized (can have Item_cond_and inside another + Item_cond_and) + - it does not include join->exec_const_cond and other similar conditions. +*/ + +bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret) +{ + THD *const thd= join->thd; + Item_cond_and *all_conds= NULL; + + Item *res= NULL; + + // Pick the ON-expression. Use the same logic as in get_sargable_cond(): + if (last_tab->on_expr_ref) + res= *last_tab->on_expr_ref; + else if (last_tab->table->pos_in_table_list && + last_tab->table->pos_in_table_list->embedding && + !last_tab->table->pos_in_table_list->embedding->sj_on_expr) + { + res= last_tab->table->pos_in_table_list->embedding->on_expr; + } + + for (JOIN_TAB *tab= first_depth_first_tab(join); + tab; + tab= next_depth_first_tab(join, tab)) + { + if (tab->select_cond) + { + if (!res) + res= tab->select_cond; + else + { + if (!all_conds) + { + if (!(all_conds= new (thd->mem_root)Item_cond_and(thd, res, + tab->select_cond))) + return true; + res= all_conds; + } + else + all_conds->add(tab->select_cond, thd->mem_root); + } + } + if (tab == last_tab) + break; + } + *ret= all_conds? all_conds: res; + return false; +} + + +static bool +make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) +{ + THD *thd= join->thd; + DBUG_ENTER("make_join_select"); + if (select) + { + add_not_null_conds(join); + table_map used_tables; + /* + Step #1: Extract constant condition + - Extract and check the constant part of the WHERE + - Extract constant parts of ON expressions from outer + joins and attach them appropriately. + */ + if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */ + { /* there may be a select without a cond. */ + if (join->table_count > 1) + cond->update_used_tables(); // Tablenr may have changed + + /* + Extract expressions that depend on constant tables + 1. Const part of the join's WHERE clause can be checked immediately + and if it is not satisfied then the join has empty result + 2. Constant parts of outer joins' ON expressions must be attached + there inside the triggers. + */ + { // Check const tables + Item* const_cond= NULL; + const_cond= make_cond_for_table(thd, cond, + join->const_table_map, + (table_map) 0, -1, FALSE, FALSE); + if (!const_cond && thd->is_error()) + DBUG_RETURN(1); + + /* Add conditions added by add_not_null_conds(). */ + for (uint i= 0 ; i < join->const_tables ; i++) + add_cond_and_fix(thd, &const_cond, + join->join_tab[i].select_cond); + + DBUG_EXECUTE("where",print_where(const_cond,"constants", + QT_ORDINARY);); + + if (const_cond) + { + Json_writer_object trace_const_cond(thd); + trace_const_cond.add("condition_on_constant_tables", const_cond); + if (const_cond->is_expensive()) + { + trace_const_cond.add("evaluated", "false") + .add("cause", "expensive cond"); + } + else + { + bool const_cond_result; + { + Json_writer_array a(thd, "computing_condition"); + const_cond_result= const_cond->val_int() != 0; + } + if (!const_cond_result) + { + DBUG_PRINT("info",("Found impossible WHERE condition")); + trace_const_cond.add("evaluated", "true") + .add("found", "impossible where"); + join->exec_const_cond= NULL; + DBUG_RETURN(1); + } + } + join->exec_const_cond= const_cond; + } + + if (join->table_count != join->const_tables) + { + COND *outer_ref_cond= make_cond_for_table(thd, cond, + join->const_table_map | + OUTER_REF_TABLE_BIT, + OUTER_REF_TABLE_BIT, + -1, FALSE, FALSE); + if (outer_ref_cond) + { + add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond); + join->outer_ref_cond= outer_ref_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + else + { + COND *pseudo_bits_cond= + make_cond_for_table(thd, cond, + join->const_table_map | + PSEUDO_TABLE_BITS, + PSEUDO_TABLE_BITS, + -1, FALSE, FALSE); + if (pseudo_bits_cond) + { + add_cond_and_fix(thd, &pseudo_bits_cond, + join->pseudo_bits_cond); + join->pseudo_bits_cond= pseudo_bits_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + } + + /* + Step #2: Extract WHERE/ON parts + */ + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables"); + Json_writer_array trace_attached_comp(thd, + "attached_conditions_computation"); + uint i; + for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--) + { + if (!join->join_tab[i].bush_children) + break; + } + uint last_top_base_tab_idx= i; + + table_map save_used_tables= 0; + used_tables=((select->const_tables=join->const_table_map) | + OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + JOIN_TAB *tab; + table_map current_map; + i= join->const_tables; + for (tab= first_depth_first_tab(join); tab; + tab= next_depth_first_tab(join, tab)) + { + bool is_hj; + + /* + first_inner is the X in queries like: + SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X + */ + JOIN_TAB *first_inner_tab= tab->first_inner; + + if (!tab->bush_children) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; + + bool use_quick_range=0; + COND *tmp; + + /* + Tables that are within SJ-Materialization nests cannot have their + conditions referring to preceding non-const tables. + - If we're looking at the first SJM table, reset used_tables + to refer to only allowed tables + */ + if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && + tab->emb_sj_nest->sj_mat_info->is_used && + !(used_tables & tab->emb_sj_nest->sj_inner_tables)) + { + save_used_tables= used_tables; + used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | + RAND_TABLE_BIT; + } + + used_tables|=current_map; + + if (tab->type == JT_REF && tab->quick && + (((uint) tab->ref.key == tab->quick->index && + tab->ref.key_length < tab->quick->max_used_key_length) || + (!is_hash_join_key_no(tab->ref.key) && + tab->table->intersect_keys.is_set(tab->ref.key)))) + { + /* Range uses longer key; Use this instead of ref on key */ + Json_writer_object ref_to_range(thd); + ref_to_range.add("ref_to_range", true); + ref_to_range.add("cause", "range uses longer key"); + tab->type=JT_ALL; + use_quick_range=1; + tab->use_quick=1; + tab->ref.key= -1; + tab->ref.key_parts=0; // Don't use ref key. + join->best_positions[i].records_read= rows2double(tab->quick->records); + /* + We will use join cache here : prevent sorting of the first + table only and sort at the end. + */ + if (i != join->const_tables && + join->table_count > join->const_tables + 1 && + join->best_positions[i].use_join_buffer) + join->full_join= 1; + } + + tmp= NULL; + + if (cond) + { + if (tab->bush_children) + { + // Reached the materialization tab + tmp= make_cond_after_sjm(thd, cond, cond, save_used_tables, + used_tables, /*inside_or_clause=*/FALSE); + used_tables= save_used_tables | used_tables; + save_used_tables= 0; + } + else + { + tmp= make_cond_for_table(thd, cond, used_tables, current_map, i, + FALSE, FALSE); + if (!tmp && thd->is_error()) + DBUG_RETURN(1); + + if (tab == join->join_tab + last_top_base_tab_idx) + { + /* + This pushes conjunctive conditions of WHERE condition such that: + - their used_tables() contain RAND_TABLE_BIT + - the conditions does not refer to any fields + (such like rand() > 0.5) + */ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + COND *rand_cond= make_cond_for_table(thd, cond, used_tables, + rand_table_bit, -1, + FALSE, FALSE); + if (rand_cond) + add_cond_and_fix(thd, &tmp, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + /* Add conditions added by add_not_null_conds(). */ + if (tab->select_cond) + add_cond_and_fix(thd, &tmp, tab->select_cond); + } + + is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) && + (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && + ((join->max_allowed_join_cache_level+1)/2 == 2 || + ((join->max_allowed_join_cache_level+1)/2 > 2 && + is_hash_join_key_no(tab->ref.key))) && + (!tab->emb_sj_nest || + join->allowed_semijoin_with_cache) && + (!(tab->table->map & join->outer_join) || + join->allowed_outer_join_with_cache); + + if (cond && !tmp && tab->quick) + { // Outer join + if (tab->type != JT_ALL && !is_hj) + { + /* + Don't use the quick method + We come here in the case where we have 'key=constant' and + the test is removed by make_cond_for_table() + */ + delete tab->quick; + tab->quick= 0; + } + else + { + /* + Hack to handle the case where we only refer to a table + in the ON part of an OUTER JOIN. In this case we want the code + below to check if we should use 'quick' instead. + */ + DBUG_PRINT("info", ("Item_int")); + tmp= (Item*) Item_true; + } + + } + if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL || + tab->type == JT_EQ_REF || first_inner_tab) + { + DBUG_EXECUTE("where",print_where(tmp, + tab->table? tab->table->alias.c_ptr() :"sjm-nest", + QT_ORDINARY);); + SQL_SELECT *sel= tab->select= ((SQL_SELECT*) + thd->memdup((uchar*) select, + sizeof(*select))); + if (!sel) + DBUG_RETURN(1); // End of memory + /* + If tab is an inner table of an outer join operation, + add a match guard to the pushed down predicate. + The guard will turn the predicate on only after + the first match for outer tables is encountered. + */ + if (cond && tmp) + { + /* + Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without + a cond, so neutralize the hack above. + */ + COND *tmp_cond; + if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, tmp, + 0))) + DBUG_RETURN(1); + sel->cond= tmp_cond; + tab->set_select_cond(tmp_cond, __LINE__); + /* Push condition to storage engine if this is enabled + and the condition is not guarded */ + if (tab->table) + { + tab->table->file->pushed_cond= NULL; + if ((tab->table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && + !first_inner_tab) + { + Json_writer_object wrap(thd); + Json_writer_object trace_cp(thd, "table_condition_pushdown"); + trace_cp.add_table_name(tab->table); + + COND *push_cond= + make_cond_for_table(thd, tmp_cond, current_map, current_map, + -1, FALSE, FALSE); + if (push_cond) + { + trace_cp.add("push_cond", push_cond); + /* Push condition to handler */ + if (!tab->table->file->cond_push(push_cond)) + tab->table->file->pushed_cond= push_cond; + } + else if (thd->is_error()) + DBUG_RETURN(1); + } + } + } + else + { + sel->cond= NULL; + tab->set_select_cond(NULL, __LINE__); + } + + sel->head=tab->table; + DBUG_EXECUTE("where", + print_where(tmp, + tab->table ? tab->table->alias.c_ptr() : + "(sjm-nest)", + QT_ORDINARY);); + if (tab->quick) + { + /* Use quick key read if it's a constant and it's not used + with key reading */ + if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF && + tab->type != JT_FT && + ((tab->type != JT_CONST && tab->type != JT_REF) || + (uint) tab->ref.key == tab->quick->index)) || is_hj) + { + DBUG_ASSERT(tab->quick->is_valid()); + sel->quick=tab->quick; // Use value from get_quick_... + sel->quick_keys.clear_all(); + sel->needed_reg.clear_all(); + if (is_hj && tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + } + else + { + delete tab->quick; + } + tab->quick=0; + } + uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0; + if (i == join->const_tables && ref_key) + { + if (!tab->const_keys.is_clear_all() && + tab->table->reginfo.impossible_range) + DBUG_RETURN(1); + } + else if (tab->type == JT_ALL && ! use_quick_range) + { + if (!tab->const_keys.is_clear_all() && + tab->table->reginfo.impossible_range) + DBUG_RETURN(1); // Impossible range + /* + We plan to scan all rows. + Check again if we should use an index. + + There are two cases: + 1) There could be an index usage the refers to a previous + table that we didn't consider before, but could be consider + now as a "last resort". For example + SELECT * from t1,t2 where t1.a between t2.a and t2.b; + 2) If the current table is the first non const table + and there is a limit it still possibly beneficial + to use the index even if the index range is big as + we can stop when we've found limit rows. + + (1) - Don't switch the used index if we are using semi-join + LooseScan on this table. Using different index will not + produce the desired ordering and de-duplication. + */ + + if (!tab->table->is_filled_at_execution() && + !tab->loosescan_match_tab && // (1) + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->lim.get_select_limit() < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) + { + /* Join with outer join condition */ + COND *orig_cond=sel->cond; + + if (build_tmp_join_prefix_cond(join, tab, &sel->cond)) + return true; + + /* + We can't call sel->cond->fix_fields, + as it will break tab->on_expr if it's AND condition + (fix_fields currently removes extra AND/OR levels). + Yet attributes of the just built condition are not needed. + Thus we call sel->cond->quick_fix_field for safety. + */ + if (sel->cond && !sel->cond->fixed()) + sel->cond->quick_fix_field(); + quick_select_return res; + + if ((res= sel->test_quick_select(thd, tab->keys, + ((used_tables & ~ current_map) | + OUTER_REF_TABLE_BIT), + (join->select_options & + OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit()), + 0, + FALSE, FALSE, FALSE)) == + SQL_SELECT::IMPOSSIBLE_RANGE) + { + /* + Before reporting "Impossible WHERE" for the whole query + we have to check isn't it only "impossible ON" instead + */ + sel->cond=orig_cond; + if (!*tab->on_expr_ref || + (res= sel->test_quick_select(thd, tab->keys, + used_tables & ~ current_map, + (join->select_options & + OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit()), + 0, FALSE, FALSE, FALSE, TRUE)) == + SQL_SELECT::IMPOSSIBLE_RANGE) + DBUG_RETURN(1); // Impossible WHERE + } + else + sel->cond=orig_cond; + + if (res == SQL_SELECT::ERROR) + DBUG_RETURN(1); /* Some error in one of test_quick_select calls */ + + /* Fix for EXPLAIN */ + if (sel->quick) + join->best_positions[i].records_read= (double)sel->quick->records; + } + else + { + sel->needed_reg=tab->needed_reg; + } + sel->quick_keys= tab->table->opt_range_keys; + if (!sel->quick_keys.is_subset(tab->checked_keys) || + !sel->needed_reg.is_subset(tab->checked_keys)) + { + /* + "Range checked for each record" is a "last resort" access method + that should only be used when the other option is a cross-product + join. + + We use the following condition (it's approximate): + 1. There are potential keys for (sel->needed_reg) + 2. There were no possible ways to construct a quick select, or + the quick select would be more expensive than the full table + scan. + */ + tab->use_quick= (!sel->needed_reg.is_clear_all() && + (sel->quick_keys.is_clear_all() || + (sel->quick && + sel->quick->read_time > + tab->table->file->scan_time() + + tab->table->file->stats.records/TIME_FOR_COMPARE + ))) ? + 2 : 1; + sel->read_tables= used_tables & ~current_map; + sel->quick_keys.clear_all(); + } + if (i != join->const_tables && tab->use_quick != 2 && + !tab->first_inner) + { /* Read with cache */ + /* + TODO: the execution also gets here when we will not be using + join buffer. Review these cases and perhaps, remove this call. + (The final decision whether to use join buffer is made in + check_join_cache_usage, so we should only call make_scan_filter() + there, too). + */ + if (tab->make_scan_filter()) + DBUG_RETURN(1); + } + } + } + + /* + Push down conditions from all ON expressions. + Each of these conditions are guarded by a variable + that turns if off just before null complemented row for + outer joins is formed. Thus, the condition from an + 'on expression' are guaranteed not to be checked for + the null complemented row. + */ + + /* + First push down constant conditions from ON expressions. + - Each pushed-down condition is wrapped into trigger which is + enabled only for non-NULL-complemented record + - The condition is attached to the first_inner_table. + + With regards to join nests: + - if we start at top level, don't walk into nests + - if we start inside a nest, stay within that nest. + */ + JOIN_TAB *start_from= tab->bush_root_tab? + tab->bush_root_tab->bush_children->start : + join->join_tab + join->const_tables; + JOIN_TAB *end_with= tab->bush_root_tab? + tab->bush_root_tab->bush_children->end : + join->join_tab + join->top_join_tab_count; + for (JOIN_TAB *join_tab= start_from; + join_tab != end_with; + join_tab++) + { + if (*join_tab->on_expr_ref) + { + JOIN_TAB *cond_tab= join_tab->first_inner; + COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref, + join->const_table_map, + (table_map) 0, -1, FALSE, FALSE); + if (!tmp_cond) + { + if (!thd->is_error()) + continue; + DBUG_RETURN(1); + } + tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, + &cond_tab->not_null_compl); + if (!tmp_cond) + DBUG_RETURN(1); + tmp_cond->quick_fix_field(); + cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : + new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond, + tmp_cond); + if (!cond_tab->select_cond) + DBUG_RETURN(1); + cond_tab->select_cond->quick_fix_field(); + cond_tab->select_cond->update_used_tables(); + if (cond_tab->select) + cond_tab->select->cond= cond_tab->select_cond; + } + } + + + /* Push down non-constant conditions from ON expressions */ + JOIN_TAB *last_tab= tab; + + /* + while we're inside of an outer join and last_tab is + the last of its tables ... + */ + while (first_inner_tab && first_inner_tab->last_inner == last_tab) + { + /* + Table tab is the last inner table of an outer join. + An on expression is always attached to it. + */ + COND *on_expr= *first_inner_tab->on_expr_ref; + + table_map used_tables2= (join->const_table_map | + OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + + start_from= tab->bush_root_tab? + tab->bush_root_tab->bush_children->start : + join->join_tab + join->const_tables; + for (JOIN_TAB *inner_tab= start_from; + inner_tab <= last_tab; + inner_tab++) + { + DBUG_ASSERT(inner_tab->table); + current_map= inner_tab->table->map; + used_tables2|= current_map; + /* + psergey: have put the -1 below. It's bad, will need to fix it. + */ + COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2, + current_map, + /*(inner_tab - first_tab)*/ -1, + FALSE, FALSE); + if (!tmp_cond && thd->is_error()) + DBUG_RETURN(1); + if (tab == last_tab) + { + /* + This pushes conjunctive conditions of ON expression of an outer + join such that: + - their used_tables() contain RAND_TABLE_BIT + - the conditions does not refer to any fields + (such like rand() > 0.5) + */ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2, + rand_table_bit, -1, + FALSE, FALSE); + if (rand_cond) + add_cond_and_fix(thd, &tmp_cond, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); + } + bool is_sjm_lookup_tab= FALSE; + if (inner_tab->bush_children) + { + /* + 'inner_tab' is an SJ-Materialization tab, i.e. we have a join + order like this: + + ot1 sjm_tab LEFT JOIN ot2 ot3 + ^ ^ + 'tab'-+ +--- left join we're adding triggers for + + LEFT JOIN's ON expression may not have references to subquery + columns. The subquery was in the WHERE clause, so IN-equality + is in the WHERE clause, also. + However, equality propagation code may have propagated the + IN-equality into ON expression, and we may get things like + + subquery_inner_table=const + + in the ON expression. We must not check such conditions during + SJM-lookup, because 1) subquery_inner_table has no valid current + row (materialization temp.table has it instead), and 2) they + would be true anyway. + */ + SJ_MATERIALIZATION_INFO *sjm= + inner_tab->bush_children->start->emb_sj_nest->sj_mat_info; + if (sjm->is_used && !sjm->is_sj_scan) + is_sjm_lookup_tab= TRUE; + } + + if (inner_tab == first_inner_tab && inner_tab->on_precond && + !is_sjm_lookup_tab) + add_cond_and_fix(thd, &tmp_cond, inner_tab->on_precond); + if (tmp_cond && !is_sjm_lookup_tab) + { + JOIN_TAB *cond_tab= (inner_tab < first_inner_tab ? + first_inner_tab : inner_tab); + Item **sel_cond_ref= (inner_tab < first_inner_tab ? + &first_inner_tab->on_precond : + &inner_tab->select_cond); + /* + First add the guards for match variables of + all embedding outer join operations. + */ + if (!(tmp_cond= add_found_match_trig_cond(thd, + cond_tab->first_inner, + tmp_cond, + first_inner_tab))) + DBUG_RETURN(1); + /* + Now add the guard turning the predicate off for + the null complemented row. + */ + DBUG_PRINT("info", ("Item_func_trig_cond")); + tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, + &first_inner_tab-> + not_null_compl); + DBUG_PRINT("info", ("Item_func_trig_cond %p", + tmp_cond)); + if (tmp_cond) + tmp_cond->quick_fix_field(); + /* Add the predicate to other pushed down predicates */ + DBUG_PRINT("info", ("Item_cond_and")); + *sel_cond_ref= !(*sel_cond_ref) ? + tmp_cond : + new (thd->mem_root) Item_cond_and(thd, *sel_cond_ref, tmp_cond); + DBUG_PRINT("info", ("Item_cond_and %p", + (*sel_cond_ref))); + if (!(*sel_cond_ref)) + DBUG_RETURN(1); + (*sel_cond_ref)->quick_fix_field(); + (*sel_cond_ref)->update_used_tables(); + if (cond_tab->select) + cond_tab->select->cond= cond_tab->select_cond; + } + } + first_inner_tab= first_inner_tab->first_upper; + } + if (!tab->bush_children) + i++; + } + + if (unlikely(thd->trace_started())) + { + trace_attached_comp.end(); + Json_writer_array trace_attached_summary(thd, + "attached_conditions_summary"); + for (tab= first_depth_first_tab(join); tab; + tab= next_depth_first_tab(join, tab)) + { + if (!tab->table) + continue; + Item *const cond = tab->select_cond; + Json_writer_object trace_one_table(thd); + trace_one_table.add_table_name(tab); + trace_one_table.add("attached", cond); + } + } + } + DBUG_RETURN(0); +} + + +static +uint get_next_field_for_derived_key(uchar *arg) +{ + KEYUSE *keyuse= *(KEYUSE **) arg; + if (!keyuse) + return (uint) (-1); + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint fldno= keyuse->keypart; + uint keypart= keyuse->keypart_map == (key_part_map) 1 ? + 0 : (keyuse-1)->keypart+1; + for ( ; + keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno; + keyuse++) + keyuse->keypart= keypart; + if (keyuse->key != key) + keyuse= 0; + *((KEYUSE **) arg)= keyuse; + return fldno; +} + + +static +uint get_next_field_for_derived_key_simple(uchar *arg) +{ + KEYUSE *keyuse= *(KEYUSE **) arg; + if (!keyuse) + return (uint) (-1); + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint fldno= keyuse->keypart; + for ( ; + keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno; + keyuse++) + ; + if (keyuse->key != key) + keyuse= 0; + *((KEYUSE **) arg)= keyuse; + return fldno; +} + +static +bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys) +{ + TABLE *table= keyuse->table; + if (table->alloc_keys(keys)) + return TRUE; + uint key_count= 0; + KEYUSE *first_keyuse= keyuse; + uint prev_part= keyuse->keypart; + uint parts= 0; + uint i= 0; + + for ( ; i < count && key_count < keys; ) + { + do + { + keyuse->key= table->s->keys; + keyuse->keypart_map= (key_part_map) (1 << parts); + keyuse++; + i++; + } + while (i < count && keyuse->used_tables == first_keyuse->used_tables && + keyuse->keypart == prev_part); + parts++; + if (i < count && keyuse->used_tables == first_keyuse->used_tables) + { + prev_part= keyuse->keypart; + } + else + { + KEYUSE *save_first_keyuse= first_keyuse; + if (table->check_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key_simple, + (uchar *) &first_keyuse)) + + { + JOIN_TAB *tab; + first_keyuse= save_first_keyuse; + if (table->add_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key, + (uchar *) &first_keyuse, + FALSE)) + return TRUE; + table->reginfo.join_tab->keys.set_bit(table->s->keys); + tab= table->reginfo.join_tab; + for (uint i=0; i < parts; i++) + tab->key_dependent|= save_first_keyuse[i].used_tables; + } + else + { + /* Mark keyuses for this key to be excluded */ + for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++) + { + curr->key= MAX_KEY; + } + } + first_keyuse= keyuse; + key_count++; + parts= 0; + prev_part= keyuse->keypart; + } + } + + return FALSE; +} + + +static +bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array) +{ + KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*); + size_t elements= keyuse_array->elements; + TABLE *prev_table= 0; + for (size_t i= 0; i < elements; i++, keyuse++) + { + if (!keyuse->table) + break; + KEYUSE *first_table_keyuse= NULL; + table_map last_used_tables= 0; + uint count= 0; + uint keys= 0; + TABLE_LIST *derived= NULL; + if (keyuse->table != prev_table) + derived= keyuse->table->pos_in_table_list; + while (derived && derived->is_materialized_derived()) + { + if (keyuse->table != prev_table) + { + prev_table= keyuse->table; + while (keyuse->table == prev_table && keyuse->key != MAX_KEY) + { + keyuse++; + i++; + } + if (keyuse->table != prev_table) + { + keyuse--; + i--; + derived= NULL; + continue; + } + first_table_keyuse= keyuse; + last_used_tables= keyuse->used_tables; + count= 0; + keys= 0; + } + else if (keyuse->used_tables != last_used_tables) + { + keys++; + last_used_tables= keyuse->used_tables; + } + count++; + keyuse++; + i++; + if (keyuse->table != prev_table) + { + if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys)) + return TRUE; + keyuse--; + i--; + derived= NULL; + } + } + } + return FALSE; +} + + +/* + @brief + Drops unused keys for each materialized derived table/view + + @details + For materialized derived tables only ref access can be used, it employs + only one index, thus we don't need the rest. For each materialized derived + table/view call TABLE::use_index to save one index chosen by the optimizer + and free others. No key is chosen then all keys will be dropped. +*/ + +void JOIN::drop_unused_derived_keys() +{ + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + + TABLE *tmp_tbl= tab->table; + if (!tmp_tbl) + continue; + if (!tmp_tbl->pos_in_table_list->is_materialized_derived()) + continue; + if (tmp_tbl->max_keys > 1 && !tab->is_ref_for_hash_join()) + tmp_tbl->use_index(tab->ref.key); + if (tmp_tbl->s->keys) + { + if (tab->ref.key >= 0 && tab->ref.key < MAX_KEY) + tab->ref.key= 0; + else + tmp_tbl->s->keys= 0; + } + tab->keys= (key_map) (tmp_tbl->s->keys ? 1 : 0); + } +} + + +/* + Evaluate the bitmap of used tables for items from the select list +*/ + +inline void JOIN::eval_select_list_used_tables() +{ + select_list_used_tables= 0; + Item *item; + List_iterator_fast<Item> it(fields_list); + while ((item= it++)) + { + select_list_used_tables|= item->used_tables(); + } + Item_outer_ref *ref; + List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list); + while ((ref= ref_it++)) + { + item= ref->outer_ref; + select_list_used_tables|= item->used_tables(); + } +} + + +/* + Determine {after which table we'll produce ordered set} + + SYNOPSIS + make_join_orderinfo() + join + + + DESCRIPTION + Determine if the set is already ordered for ORDER BY, so it can + disable join cache because it will change the ordering of the results. + Code handles sort table that is at any location (not only first after + the const tables) despite the fact that it's currently prohibited. + We must disable join cache if the first non-const table alone is + ordered. If there is a temp table the ordering is done as a last + operation and doesn't prevent join cache usage. + + RETURN + Number of table after which the set will be ordered + join->tables if we don't need an ordered set +*/ + +static uint make_join_orderinfo(JOIN *join) +{ + /* + This function needs to be fixed to take into account that we now have SJM + nests. + */ + DBUG_ASSERT(0); + + JOIN_TAB *tab; + if (join->need_tmp) + return join->table_count; + tab= join->get_sort_by_join_tab(); + return tab ? (uint)(tab-join->join_tab) : join->table_count; +} + +/* + Deny usage of join buffer for the specified table + + SYNOPSIS + set_join_cache_denial() + tab join table for which join buffer usage is to be denied + + DESCRIPTION + The function denies usage of join buffer when joining the table 'tab'. + The table is marked as not employing any join buffer. If a join cache + object has been already allocated for the table this object is destroyed. + + RETURN + none +*/ + +static +void set_join_cache_denial(JOIN_TAB *join_tab) +{ + if (join_tab->cache) + { + /* + If there is a previous cache linked to this cache through the + next_cache pointer: remove the link. + */ + if (join_tab->cache->prev_cache) + join_tab->cache->prev_cache->next_cache= 0; + /* + Same for the next_cache + */ + if (join_tab->cache->next_cache) + join_tab->cache->next_cache->prev_cache= 0; + + join_tab->cache->free(); + join_tab->cache= 0; + } + if (join_tab->use_join_cache) + { + join_tab->use_join_cache= FALSE; + join_tab->used_join_cache_level= 0; + /* + It could be only sub_select(). It could not be sub_seject_sjm because we + don't do join buffering for the first table in sjm nest. + */ + join_tab[-1].next_select= sub_select; + if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join()) + { + join_tab->type= JT_ALL; + join_tab->ref.key_parts= 0; + } + join_tab->join->return_tab= join_tab; + } +} + + +/** + The default implementation of unlock-row method of READ_RECORD, + used in all access methods. +*/ + +void rr_unlock_row(st_join_table *tab) +{ + READ_RECORD *info= &tab->read_record; + info->table->file->unlock_row(); +} + + +/** + Pick the appropriate access method functions + + Sets the functions for the selected table access method + + @param tab Table reference to put access method +*/ + +static void +pick_table_access_method(JOIN_TAB *tab) +{ + switch (tab->type) + { + case JT_REF: + tab->read_first_record= join_read_always_key; + tab->read_record.read_record_func= join_read_next_same; + break; + + case JT_REF_OR_NULL: + tab->read_first_record= join_read_always_key_or_null; + tab->read_record.read_record_func= join_read_next_same_or_null; + break; + + case JT_CONST: + tab->read_first_record= join_read_const; + tab->read_record.read_record_func= join_no_more_records; + break; + + case JT_EQ_REF: + tab->read_first_record= join_read_key; + tab->read_record.read_record_func= join_no_more_records; + break; + + case JT_FT: + tab->read_first_record= join_ft_read_first; + tab->read_record.read_record_func= join_ft_read_next; + break; + + case JT_SYSTEM: + tab->read_first_record= join_read_system; + tab->read_record.read_record_func= join_no_more_records; + break; + + /* keep gcc happy */ + default: + break; + } +} + + +/* + Revise usage of join buffer for the specified table and the whole nest + + SYNOPSIS + revise_cache_usage() + tab join table for which join buffer usage is to be revised + + DESCRIPTION + The function revise the decision to use a join buffer for the table 'tab'. + If this table happened to be among the inner tables of a nested outer join/ + semi-join the functions denies usage of join buffers for all of them + + RETURN + none +*/ + +static +void revise_cache_usage(JOIN_TAB *join_tab) +{ + JOIN_TAB *tab; + JOIN_TAB *first_inner; + + if (join_tab->first_inner) + { + JOIN_TAB *end_tab= join_tab; + for (first_inner= join_tab->first_inner; + first_inner; + first_inner= first_inner->first_upper) + { + for (tab= end_tab; tab >= first_inner; tab--) + set_join_cache_denial(tab); + end_tab= first_inner; + } + } + else if (join_tab->first_sj_inner_tab) + { + first_inner= join_tab->first_sj_inner_tab; + for (tab= join_tab; tab >= first_inner; tab--) + { + set_join_cache_denial(tab); + } + } + else set_join_cache_denial(join_tab); +} + + +/* + end_select-compatible function that writes the record into a sjm temptable + + SYNOPSIS + end_sj_materialize() + join The join + join_tab Points to right after the last join_tab in materialization bush + end_of_records FALSE <=> This call is made to pass another record + combination + TRUE <=> EOF (no action) + + DESCRIPTION + This function is used by semi-join materialization to capture suquery's + resultset and write it into the temptable (that is, materialize it). + + NOTE + This function is used only for semi-join materialization. Non-semijoin + materialization uses different mechanism. + + RETURN + NESTED_LOOP_OK + NESTED_LOOP_ERROR +*/ + +enum_nested_loop_state +end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + int error; + THD *thd= join->thd; + SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info; + DBUG_ENTER("end_sj_materialize"); + if (!end_of_records) + { + TABLE *table= sjm->table; + + List_iterator<Item> it(sjm->sjm_table_cols); + Item *item; + while ((item= it++)) + { + if (item->is_null()) + DBUG_RETURN(NESTED_LOOP_OK); + } + fill_record(thd, table, table->field, sjm->sjm_table_cols, TRUE, FALSE); + if (unlikely(thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + /* create_myisam_from_heap will generate error if needed */ + if (table->file->is_fatal_error(error, HA_CHECK_DUP) && + create_internal_tmp_table_from_heap(thd, table, + sjm->sjm_table_param.start_recinfo, + &sjm->sjm_table_param.recinfo, error, 1, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + Check whether a join buffer can be used to join the specified table + + SYNOPSIS + check_join_cache_usage() + tab joined table to check join buffer usage for + options options of the join + no_jbuf_after don't use join buffering after table with this number + prev_tab previous join table + + DESCRIPTION + The function finds out whether the table 'tab' can be joined using a join + buffer. This check is performed after the best execution plan for 'join' + has been chosen. If the function decides that a join buffer can be employed + then it selects the most appropriate join cache object that contains this + join buffer. + The result of the check and the type of the the join buffer to be used + depend on: + - the access method to access rows of the joined table + - whether the join table is an inner table of an outer join or semi-join + - whether the optimizer switches + outer_join_with_cache, semijoin_with_cache, join_cache_incremental, + join_cache_hashed, join_cache_bka, + are set on or off + - the join cache level set for the query + - the join 'options'. + + In any case join buffer is not used if the number of the joined table is + greater than 'no_jbuf_after'. It's also never used if the value of + join_cache_level is equal to 0. + If the optimizer switch outer_join_with_cache is off no join buffer is + used for outer join operations. + If the optimizer switch semijoin_with_cache is off no join buffer is used + for semi-join operations. + If the optimizer switch join_cache_incremental is off no incremental join + buffers are used. + If the optimizer switch join_cache_hashed is off then the optimizer uses + neither BNLH algorithm, nor BKAH algorithm to perform join operations. + + If the optimizer switch join_cache_bka is off then the optimizer uses + neither BKA algorithm, nor BKAH algorithm to perform join operation. + The valid settings for join_cache_level lay in the interval 0..8. + If it set to 0 no join buffers are used to perform join operations. + Currently we differentiate between join caches of 8 levels: + 1 : non-incremental join cache used for BNL join algorithm + 2 : incremental join cache used for BNL join algorithm + 3 : non-incremental join cache used for BNLH join algorithm + 4 : incremental join cache used for BNLH join algorithm + 5 : non-incremental join cache used for BKA join algorithm + 6 : incremental join cache used for BKA join algorithm + 7 : non-incremental join cache used for BKAH join algorithm + 8 : incremental join cache used for BKAH join algorithm + If the value of join_cache_level is set to n then no join caches of + levels higher than n can be employed. + + If the optimizer switches outer_join_with_cache, semijoin_with_cache, + join_cache_incremental, join_cache_hashed, join_cache_bka are all on + the following rules are applied. + If join_cache_level==1|2 then join buffer is used for inner joins, outer + joins and semi-joins with 'JT_ALL' access method. In this case a + JOIN_CACHE_BNL object is employed. + If join_cache_level==3|4 and then join buffer is used for a join operation + (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method + then a JOIN_CACHE_BNLH object is employed. + If an index is used to access rows of the joined table and the value of + join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. + If an index is used to access rows of the joined table and the value of + join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed. + If the value of join_cache_level is odd then creation of a non-linked + join cache is forced. + + Currently for any join operation a join cache of the level of the + highest allowed and applicable level is used. + For example, if join_cache_level is set to 6 and the optimizer switch + join_cache_bka is off, while the optimizer switch join_cache_hashed is + on then for any inner join operation with JT_REF/JT_EQREF access method + to the joined table the BNLH join algorithm will be used, while for + the table accessed by the JT_ALL methods the BNL algorithm will be used. + + If the function decides that a join buffer can be used to join the table + 'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns + the selected join cache object to the field 'cache' of the previous + join table. + If the function creates a join cache object it tries to initialize it. The + failure to do this results in an invocation of the function that destructs + the created object. + If the function decides that but some reasons no join buffer can be used + for a table it calls the function revise_cache_usage that checks + whether join cache should be denied for some previous tables. In this case + a pointer to the first table for which join cache usage has been denied + is passed in join->return_val (see the function set_join_cache_denial). + + The functions changes the value the fields tab->icp_other_tables_ok and + tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm + requires it. + + NOTES + An inner table of a nested outer join or a nested semi-join can be currently + joined only when a linked cache object is employed. In these cases setting + join_cache_incremental to 'off' results in denial of usage of any join + buffer when joining the table. + For a nested outer join/semi-join, currently, we either use join buffers for + all inner tables or for none of them. + Some engines (e.g. Falcon) currently allow to use only a join cache + of the type JOIN_CACHE_BKAH when the joined table is accessed through + an index. For these engines setting the value of join_cache_level to 5 or 6 + results in that no join buffer is used to join the table. + + RETURN VALUE + cache level if cache is used, otherwise returns 0 + + TODO + Support BKA inside SJ-Materialization nests. When doing this, we'll need + to only store sj-inner tables in the join buffer. +#if 0 + JOIN_TAB *first_tab= join->join_tab+join->const_tables; + uint n_tables= i-join->const_tables; + / * + We normally put all preceding tables into the join buffer, except + for the constant tables. + If we're inside a semi-join materialization nest, e.g. + + outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ... + ^-- we're here + + then we need to put into the join buffer only the tables from + within the nest. + * / + if (i >= first_sjm_table && i < last_sjm_table) + { + n_tables= i - first_sjm_table; // will be >0 if we got here + first_tab= join->join_tab + first_sjm_table; + } +#endif +*/ + +static +uint check_join_cache_usage(JOIN_TAB *tab, + ulonglong options, + uint no_jbuf_after, + uint table_index, + JOIN_TAB *prev_tab) +{ + Cost_estimate cost; + uint flags= 0; + ha_rows rows= 0; + uint bufsz= 4096; + JOIN_CACHE *prev_cache=0; + JOIN *join= tab->join; + MEM_ROOT *root= join->thd->mem_root; + uint cache_level= tab->used_join_cache_level; + bool force_unlinked_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT); + bool no_hashed_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT); + bool no_bka_cache= + !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT); + + join->return_tab= 0; + + if (tab->no_forced_join_cache) + goto no_join_cache; + + /* + Don't use join cache if @@join_cache_level==0 or this table is the first + one join suborder (either at top level or inside a bush) + */ + if (cache_level == 0 || !prev_tab) + return 0; + + if (force_unlinked_cache && (cache_level%2 == 0)) + cache_level--; + + if (options & SELECT_NO_JOIN_CACHE) + goto no_join_cache; + + if (tab->use_quick == 2) + goto no_join_cache; + + if (tab->table->map & join->complex_firstmatch_tables) + goto no_join_cache; + + /* + Don't use join cache if we're inside a join tab range covered by LooseScan + strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it + should be possible to use join buffering in the same way we're using it for + multi-table firstmatch ranges). + */ + if (tab->inside_loosescan_range) + goto no_join_cache; + + if (tab->is_inner_table_of_semijoin() && + !join->allowed_semijoin_with_cache) + goto no_join_cache; + if (tab->is_inner_table_of_outer_join() && + !join->allowed_outer_join_with_cache) + goto no_join_cache; + + if (tab->table->pos_in_table_list->table_function && + !tab->table->pos_in_table_list->table_function->join_cache_allowed()) + goto no_join_cache; + + /* + Non-linked join buffers can't guarantee one match + */ + if (tab->is_nested_inner()) + { + if (force_unlinked_cache || cache_level == 1) + goto no_join_cache; + if (cache_level & 1) + cache_level--; + } + + /* + Don't use BKA for materialized tables. We could actually have a + meaningful use of BKA when linked join buffers are used. + + The problem is, the temp.table is not filled (actually not even opened + properly) yet, and this doesn't let us call + handler->multi_range_read_info(). It is possible to come up with + estimates, etc. without acessing the table, but it seems not to worth the + effort now. + */ + if (tab->table->pos_in_table_list->is_materialized_derived()) + { + no_bka_cache= true; + /* + Don't use hash join algorithm if the temporary table for the rows + of the derived table will be created with an equi-join key. + */ + if (tab->table->s->keys) + no_hashed_cache= true; + } + + /* + Don't use join buffering if we're dictated not to by no_jbuf_after + (This is not meaningfully used currently) + */ + if (table_index > no_jbuf_after) + goto no_join_cache; + + /* + TODO: BNL join buffer should be perfectly ok with tab->bush_children. + */ + if (tab->loosescan_match_tab || tab->bush_children) + goto no_join_cache; + + for (JOIN_TAB *first_inner= tab->first_inner; first_inner; + first_inner= first_inner->first_upper) + { + if (first_inner != tab && + (!first_inner->use_join_cache || !(tab-1)->use_join_cache)) + goto no_join_cache; + } + if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab && + (!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache)) + goto no_join_cache; + if (!prev_tab->use_join_cache) + { + /* + Check whether table tab and the previous one belong to the same nest of + inner tables and if so do not use join buffer when joining table tab. + */ + if (tab->first_inner && tab != tab->first_inner) + { + for (JOIN_TAB *first_inner= tab[-1].first_inner; + first_inner; + first_inner= first_inner->first_upper) + { + if (first_inner == tab->first_inner) + goto no_join_cache; + } + } + else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab && + tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab) + goto no_join_cache; + } + + prev_cache= prev_tab->cache; + + switch (tab->type) { + case JT_ALL: + if (cache_level == 1) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache))) + { + tab->icp_other_tables_ok= FALSE; + /* If make_join_select() hasn't called make_scan_filter(), do it now */ + if (!tab->cache_select && tab->make_scan_filter()) + goto no_join_cache; + return (2 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + case JT_SYSTEM: + case JT_CONST: + case JT_REF: + case JT_EQ_REF: + if (cache_level <=2 || (no_hashed_cache && no_bka_cache)) + goto no_join_cache; + if (tab->ref.is_access_triggered()) + goto no_join_cache; + + if (!tab->is_ref_for_hash_join() && !no_bka_cache) + { + flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT; + if (tab->table->covering_keys.is_set(tab->ref.key)) + flags|= HA_MRR_INDEX_ONLY; + rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20, + tab->ref.key_parts, + &bufsz, &flags, &cost); + } + + if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache || + tab->is_ref_for_hash_join() || + ((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6)) + { + if (!tab->hash_join_is_possible() || + tab->make_scan_filter()) + goto no_join_cache; + if (cache_level == 3) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BNLH(join, tab, prev_cache))) + { + tab->icp_other_tables_ok= FALSE; + return (4 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + } + if (cache_level > 4 && no_bka_cache) + goto no_join_cache; + + if ((flags & HA_MRR_NO_ASSOCIATION) && + (cache_level <= 6 || no_hashed_cache)) + goto no_join_cache; + + if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL)) + { + if (cache_level <= 6 || no_hashed_cache) + { + if (cache_level == 5) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BKA(join, tab, flags, prev_cache))) + return (6 - MY_TEST(!prev_cache)); + goto no_join_cache; + } + else + { + if (cache_level == 7) + prev_cache= 0; + if ((tab->cache= new (root) JOIN_CACHE_BKAH(join, tab, flags, prev_cache))) + { + tab->idx_cond_fact_out= FALSE; + return (8 - MY_TEST(!prev_cache)); + } + goto no_join_cache; + } + } + goto no_join_cache; + default : ; + } + +no_join_cache: + if (tab->type != JT_ALL && tab->is_ref_for_hash_join()) + { + tab->type= JT_ALL; + tab->ref.key_parts= 0; + } + revise_cache_usage(tab); + return 0; +} + + +/* + Check whether join buffers can be used to join tables of a join + + SYNOPSIS + check_join_cache_usage() + join join whose tables are to be checked + options options of the join + no_jbuf_after don't use join buffering after table with this number + (The tables are assumed to be numbered in + first_linear_tab(join, WITHOUT_CONST_TABLES), + next_linear_tab(join, WITH_CONST_TABLES) order). + + DESCRIPTION + For each table after the first non-constant table the function checks + whether the table can be joined using a join buffer. If the function decides + that a join buffer can be employed then it selects the most appropriate join + cache object that contains this join buffer whose level is not greater + than join_cache_level set for the join. To make this check the function + calls the function check_join_cache_usage for every non-constant table. + + NOTES + In some situations (e.g. for nested outer joins, for nested semi-joins) only + incremental buffers can be used. If it turns out that for some inner table + no join buffer can be used then any inner table of an outer/semi-join nest + cannot use join buffer. In the case when already chosen buffer must be + denied for a table the function recalls check_join_cache_usage() + starting from this table. The pointer to the table from which the check + has to be restarted is returned in join->return_val (see the description + of check_join_cache_usage). +*/ + +void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, + uint no_jbuf_after) +{ + JOIN_TAB *tab; + JOIN_TAB *prev_tab; + + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + tab->used_join_cache_level= join->max_allowed_join_cache_level; + } + + uint idx= join->const_tables; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { +restart: + tab->icp_other_tables_ok= TRUE; + tab->idx_cond_fact_out= TRUE; + + /* + Check if we have a preceding join_tab, as something that will feed us + records that we could buffer. We don't have it, if + - this is the first non-const table in the join order, + - this is the first table inside an SJM nest. + */ + prev_tab= tab - 1; + if (tab == join->join_tab + join->const_tables || + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) + prev_tab= NULL; + + switch (tab->type) { + case JT_SYSTEM: + case JT_CONST: + case JT_EQ_REF: + case JT_REF: + case JT_REF_OR_NULL: + case JT_ALL: + tab->used_join_cache_level= check_join_cache_usage(tab, options, + no_jbuf_after, + idx, + prev_tab); + tab->use_join_cache= MY_TEST(tab->used_join_cache_level); + /* + psergey-merge: todo: raise the question that this is really stupid that + we can first allocate a join buffer, then decide not to use it and free + it. + */ + if (join->return_tab) + { + tab= join->return_tab; + goto restart; + } + break; + default: + tab->used_join_cache_level= 0; + } + if (!tab->bush_children) + idx++; + } +} + +/** + Remove pushdown conditions that are already checked by the scan phase + of BNL/BNLH joins. + + @note + If the single-table condition for this table will be used by a + blocked join to pre-filter this table's rows, there is no need + to re-check the same single-table condition for each joined record. + + This method removes from JOIN_TAB::select_cond and JOIN_TAB::select::cond + all top-level conjuncts that also appear in in JOIN_TAB::cache_select::cond. +*/ + +void JOIN_TAB::remove_redundant_bnl_scan_conds() +{ + if (!(select_cond && cache_select && cache && + (cache->get_join_alg() == JOIN_CACHE::BNL_JOIN_ALG || + cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG))) + return; + + /* + select->cond is not processed separately. This method assumes it is always + the same as select_cond. + */ + if (select && select->cond != select_cond) + return; + + if (is_cond_and(select_cond)) + { + List_iterator<Item> pushed_cond_li(*((Item_cond*) select_cond)->argument_list()); + Item *pushed_item; + Item_cond_and *reduced_select_cond= new (join->thd->mem_root) + Item_cond_and(join->thd); + + if (is_cond_and(cache_select->cond)) + { + List_iterator<Item> scan_cond_li(*((Item_cond*) cache_select->cond)->argument_list()); + Item *scan_item; + while ((pushed_item= pushed_cond_li++)) + { + bool found_cond= false; + scan_cond_li.rewind(); + while ((scan_item= scan_cond_li++)) + { + if (pushed_item->eq(scan_item, 0)) + { + found_cond= true; + break; + } + } + if (!found_cond) + reduced_select_cond->add(pushed_item, join->thd->mem_root); + } + } + else + { + while ((pushed_item= pushed_cond_li++)) + { + if (!pushed_item->eq(cache_select->cond, 0)) + reduced_select_cond->add(pushed_item, join->thd->mem_root); + } + } + + /* + JOIN_CACHE::check_match uses JOIN_TAB::select->cond instead of + JOIN_TAB::select_cond. set_cond() sets both pointers. + */ + if (reduced_select_cond->argument_list()->is_empty()) + set_cond(NULL); + else if (reduced_select_cond->argument_list()->elements == 1) + set_cond(reduced_select_cond->argument_list()->head()); + else + { + reduced_select_cond->quick_fix_field(); + set_cond(reduced_select_cond); + } + } + else if (select_cond->eq(cache_select->cond, 0)) + set_cond(NULL); +} + + +/* + Plan refinement stage: do various setup things for the executor + + SYNOPSIS + make_join_readinfo() + join Join being processed + options Join's options (checking for SELECT_DESCRIBE, + SELECT_NO_JOIN_CACHE) + no_jbuf_after Don't use join buffering after table with this number. + + DESCRIPTION + Plan refinement stage: do various set ups for the executioner + - set up use of join buffering + - push index conditions + - increment relevant counters + - etc + + RETURN + FALSE - OK + TRUE - Out of memory +*/ + +static bool +make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) +{ + JOIN_TAB *tab; + uint i; + DBUG_ENTER("make_join_readinfo"); + + bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE)); + bool sorted= 1; + + join->complex_firstmatch_tables= table_map(0); + + if (!join->select_lex->sj_nests.is_empty() && + setup_semijoin_dups_elimination(join, options, no_jbuf_after)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + /* For const tables, set partial_join_cardinality to 1. */ + for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++) + tab->partial_join_cardinality= 1; + + JOIN_TAB *prev_tab= NULL; + i= join->const_tables; + for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + /* + The approximation below for partial join cardinality is not good because + - it does not take into account some pushdown predicates + - it does not differentiate between inner joins, outer joins and + semi-joins. + Later it should be improved. + */ + + if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) + prev_tab= NULL; + DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); + + tab->partial_join_cardinality= join->best_positions[i].records_read * + (prev_tab? prev_tab->partial_join_cardinality : 1); + if (!tab->bush_children) + i++; + } + + check_join_cache_usage_for_tables(join, options, no_jbuf_after); + + JOIN_TAB *first_tab; + for (tab= first_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) + { + if (tab->bush_children) + { + if (setup_sj_materialization_part2(tab)) + return TRUE; + } + + TABLE *table=tab->table; + uint jcl= tab->used_join_cache_level; + tab->read_record.table= table; + tab->read_record.unlock_row= rr_unlock_row; + tab->read_record.print_error= true; + tab->sorted= sorted; + sorted= 0; // only first must be sorted + + + /* + We should not set tab->next_select for the last table in the + SMJ-nest, as setup_sj_materialization() has already set it to + end_sj_materialize. + */ + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + } + + + if (tab->loosescan_match_tab) + { + if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab-> + loosescan_key_len))) + return TRUE; /* purecov: inspected */ + tab->sorted= TRUE; + } + table->status=STATUS_NO_RECORD; + pick_table_access_method (tab); + + if (jcl) + tab[-1].next_select=sub_select_cache; + + if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG) + tab->type= JT_HASH; + + switch (tab->type) { + case JT_SYSTEM: // Only happens with left join + case JT_CONST: // Only happens with left join + /* Only happens with outer joins */ + tab->read_first_record= tab->type == JT_SYSTEM ? join_read_system + : join_read_const; + tab->read_record.unlock_row= join_const_unlock_row; + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_EQ_REF: + tab->read_record.unlock_row= join_read_key_unlock_row; + /* fall through */ + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_REF_OR_NULL: + case JT_REF: + if (tab->select) + { + delete tab->select->quick; + tab->select->quick=0; + } + delete tab->quick; + tab->quick=0; + if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) && + (!jcl || jcl > 4) && !tab->ref.is_access_triggered()) + push_index_cond(tab, tab->ref.key); + break; + case JT_ALL: + case JT_HASH: + /* + If previous table use cache + If the incoming data set is already sorted don't use cache. + Also don't use cache if this is the first table in semi-join + materialization nest. + */ + /* These init changes read_record */ + if (tab->use_quick == 2) + { + join->thd->set_status_no_good_index_used(); + tab->read_first_record= join_init_quick_read_record; + if (statistics) + join->thd->inc_status_select_range_check(); + } + else + { + if (!tab->bush_children) + tab->read_first_record= join_init_read_record; + if (tab == first_tab) + { + if (tab->select && tab->select->quick) + { + if (statistics) + join->thd->inc_status_select_range(); + } + else + { + join->thd->set_status_no_index_used(); + if (statistics) + { + join->thd->inc_status_select_scan(); + join->thd->query_plan_flags|= QPLAN_FULL_SCAN; + } + } + } + else + { + if (tab->select && tab->select->quick) + { + if (statistics) + join->thd->inc_status_select_full_range_join(); + } + else + { + join->thd->set_status_no_index_used(); + if (statistics) + { + join->thd->inc_status_select_full_join(); + join->thd->query_plan_flags|= QPLAN_FULL_JOIN; + } + } + } + if (!table->no_keyread) + { + if (!(tab->select && tab->select->quick && + tab->select->quick->index != MAX_KEY && //not index_merge + table->covering_keys.is_set(tab->select->quick->index)) && + (!table->covering_keys.is_clear_all() && + !(tab->select && tab->select->quick))) + { // Only read index tree + if (tab->loosescan_match_tab) + tab->index= tab->loosescan_key; + else + { +#ifdef BAD_OPTIMIZATION + /* + It has turned out that the below change, while speeding things + up for disk-bound loads, slows them down for cases when the data + is in disk cache (see BUG#35850): + See bug #26447: "Using the clustered index for a table scan + is always faster than using a secondary index". + */ + if (table->file->pk_is_clustering_key(table->s->primary_key)) + tab->index= table->s->primary_key; + else +#endif + tab->index=find_shortest_key(table, & table->covering_keys); + } + tab->read_first_record= join_read_first; + /* Read with index_first / index_next */ + tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT; + } + } + if (tab->select && tab->select->quick && + tab->select->quick->index != MAX_KEY && + !tab->table->covering_keys.is_set(tab->select->quick->index)) + push_index_cond(tab, tab->select->quick->index); + } + break; + case JT_FT: + break; + /* purecov: begin deadcode */ + default: + DBUG_PRINT("error",("Table type %d found",tab->type)); + break; + case JT_UNKNOWN: + case JT_MAYBE_REF: + abort(); + /* purecov: end */ + } + + DBUG_EXECUTE("where", + char buff[256]; + String str(buff,sizeof(buff),system_charset_info); + str.length(0); + if (tab->table) + str.append(tab->table->alias); + else + str.append(STRING_WITH_LEN("<no_table_name>")); + str.append(STRING_WITH_LEN(" final_pushdown_cond")); + print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY);); + } + uint n_top_tables= (uint)(join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start); + + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ + + /* + If a join buffer is used to join a table the ordering by an index + for the first non-constant table cannot be employed anymore. + */ + for (tab= join->join_tab + join->const_tables ; + tab != join->join_tab + n_top_tables ; tab++) + { + if (tab->use_join_cache) + { + JOIN_TAB *sort_by_tab= join->group && join->simple_group && + join->group_list ? + join->join_tab+join->const_tables : + join->get_sort_by_join_tab(); + /* + It could be that sort_by_tab==NULL, and the plan is to use filesort() + on the first table. + */ + if (join->order) + { + join->simple_order= 0; + join->need_tmp= 1; + } + + if (join->group && !join->group_optimized_away) + { + join->need_tmp= 1; + join->simple_group= 0; + } + + if (sort_by_tab) + { + join->need_tmp= 1; + join->simple_order= join->simple_group= 0; + if (sort_by_tab->type == JT_NEXT && + !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index)) + { + sort_by_tab->type= JT_ALL; + sort_by_tab->read_first_record= join_init_read_record; + } + else if (sort_by_tab->type == JT_HASH_NEXT && + !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index)) + { + sort_by_tab->type= JT_HASH; + sort_by_tab->read_first_record= join_init_read_record; + } + } + break; + } + } + + DBUG_RETURN(FALSE); +} + + +/** + Give error if we some tables are done with a full join. + + This is used by multi_table_update and multi_table_delete when running + in safe mode. + + @param join Join condition + + @retval + 0 ok + @retval + 1 Error (full join used) +*/ + +bool error_if_full_join(JOIN *join) +{ + for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab; + tab= next_top_level_tab(join, tab)) + { + if (tab->type == JT_ALL && (!tab->select || !tab->select->quick)) + { + my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, + ER_THD(join->thd, + ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0)); + return(1); + } + } + return(0); +} + + +/** + Build rowid filter. + + @retval + 0 ok + @retval + 1 Error, transaction should be rolled back +*/ + +bool JOIN_TAB::build_range_rowid_filter_if_needed() +{ + bool result= false; + if (rowid_filter && !is_rowid_filter_built) + { + /** + The same handler object (table->file) is used to build a filter + and to perfom a primary table access (by the main query). + + To estimate the time for filter building tracker should be changed + and after building of the filter has been finished it should be + switched back to the previos tracker. + */ + Exec_time_tracker *table_tracker= table->file->get_time_tracker(); + Rowid_filter_tracker *rowid_tracker= rowid_filter->get_tracker(); + table->file->set_time_tracker(rowid_tracker->get_time_tracker()); + rowid_tracker->start_tracking(join->thd); + Rowid_filter::build_return_code build_rc= rowid_filter->build(); + if (build_rc == Rowid_filter::SUCCESS) + is_rowid_filter_built= true; + else + { + delete rowid_filter; + rowid_filter= 0; + } + rowid_tracker->stop_tracking(join->thd); + table->file->set_time_tracker(table_tracker); + result= (build_rc == Rowid_filter::FATAL_ERROR); + } + return result; +} + + +/** + cleanup JOIN_TAB. + + DESCRIPTION + This is invoked when we've finished all join executions. +*/ + +void JOIN_TAB::cleanup() +{ + DBUG_ENTER("JOIN_TAB::cleanup"); + + DBUG_PRINT("enter", ("tab: %p table %s.%s", + this, + (table ? table->s->db.str : "?"), + (table ? table->s->table_name.str : "?"))); + delete select; + select= 0; + delete quick; + quick= 0; + if (rowid_filter) + { + delete rowid_filter; + rowid_filter= 0; + } + if (cache) + { + cache->free(); + cache= 0; + } + limit= 0; + // Free select that was created for filesort outside of create_sort_index + if (filesort && filesort->select && !filesort->own_select) + delete filesort->select; + delete filesort; + filesort= NULL; + /* Skip non-existing derived tables/views result tables */ + if (table && + (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created())) + { + table->file->ha_end_keyread(); + table->file->ha_index_or_rnd_end(); + } + if (table) + { + table->file->ha_end_keyread(); + if (type == JT_FT) + table->file->ha_ft_end(); + else + table->file->ha_index_or_rnd_end(); + preread_init_done= FALSE; + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + /* + Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't + attempt to make another free_tmp_table call. + */ + table->pos_in_table_list->table= NULL; + free_tmp_table(join->thd, table); + } + else + { + TABLE_LIST *tmp= table->pos_in_table_list; + end_read_record(&read_record); + tmp->jtbm_subselect->cleanup(); + /* + The above call freed the materializedd temptable. Set it to NULL so + that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked + multiple times (it may be) + */ + tmp->table= NULL; + } + table= NULL; + DBUG_VOID_RETURN; + } + /* + We need to reset this for next select + (Tested in part_of_refkey) + */ + table->reginfo.join_tab= 0; + } + end_read_record(&read_record); + explain_plan= NULL; + DBUG_VOID_RETURN; +} + + +/** + Estimate the time to get rows of the joined table +*/ + +double JOIN_TAB::scan_time() +{ + double res; + if (table->is_created()) + { + if (table->is_filled_at_execution()) + { + get_delayed_table_estimates(table, &records, &read_time, + &startup_cost); + found_records= records; + table->opt_range_condition_rows= records; + } + else + { + found_records= records= table->stat_records(); + read_time= table->file->scan_time(); + /* + table->opt_range_condition_rows has already been set to + table->file->stats.records + */ + } + res= read_time; + } + else + { + found_records= records=table->stat_records(); + read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub + res= read_time; + } + return res; +} + + +/** + Estimate the number of rows that a an access method will read from a table. + + @todo: why not use JOIN_TAB::found_records +*/ + +ha_rows JOIN_TAB::get_examined_rows() +{ + double examined_rows; + const SQL_SELECT *sel= get_sql_select(); + + if (sel && sel->quick && use_quick != 2) + examined_rows= (double)sel->quick->records; + else if (type == JT_NEXT || type == JT_ALL || + type == JT_HASH || type ==JT_HASH_NEXT) + { + if (limit) + { + /* + @todo This estimate is wrong, a LIMIT query may examine much more rows + than the LIMIT itself. + */ + examined_rows= (double)limit; + } + else + { + if (table->is_filled_at_execution()) + examined_rows= (double)records; + else + { + /* + handler->info(HA_STATUS_VARIABLE) has been called in + make_join_statistics() + */ + examined_rows= (double)table->stat_records(); + } + } + } + else + examined_rows= records_read; + + if (examined_rows >= (double) HA_ROWS_MAX) + return HA_ROWS_MAX; + return (ha_rows) examined_rows; +} + + +/** + Initialize the join_tab before reading. + Currently only derived table/view materialization is done here. + + TODO: consider moving this together with join_tab_execution_startup +*/ +bool JOIN_TAB::preread_init() +{ + TABLE_LIST *derived= table->pos_in_table_list; + DBUG_ENTER("JOIN_TAB::preread_init"); + + if (!derived || !derived->is_materialized_derived()) + { + preread_init_done= TRUE; + DBUG_RETURN(FALSE); + } + + /* Materialize derived table/view. */ + if ((!derived->get_unit()->executed || + derived->is_recursive_with_table() || + derived->get_unit()->uncacheable) && + mysql_handle_single_derived(join->thd->lex, + derived, DT_CREATE | DT_FILL)) + DBUG_RETURN(TRUE); + + if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) || + derived->is_nonrecursive_derived_with_rec_ref() || + is_split_derived) + preread_init_done= TRUE; + if (select && select->quick) + select->quick->replace_handler(table->file); + + DBUG_EXECUTE_IF("show_explain_probe_join_tab_preread", + if (dbug_user_var_equals_int(join->thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(join->thd, 1); + ); + + /* init ftfuns for just initialized derived table */ + if (table->fulltext_searched) + if (init_ftfuncs(join->thd, join->select_lex, MY_TEST(join->order))) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +bool JOIN_TAB::pfs_batch_update(JOIN *join) +{ + /* + Use PFS batch mode if + 1. tab is an inner-most table, or + 2. will read more than one row (not eq_ref or const access type) + 3. no subqueries + */ + + return join->join_tab + join->table_count - 1 == this && // 1 + type != JT_EQ_REF && type != JT_CONST && type != JT_SYSTEM && // 2 + (!select_cond || !select_cond->with_subquery()); // 3 +} + + +/** + Build a TABLE_REF structure for index lookup in the temporary table + + @param thd Thread handle + @param tmp_key The temporary table key + @param it The iterator of items for lookup in the key + @param skip Number of fields from the beginning to skip + + @details + Build TABLE_REF object for lookup in the key 'tmp_key' using items + accessible via item iterator 'it'. + + @retval TRUE Error + @retval FALSE OK +*/ + +bool TABLE_REF::tmp_table_index_lookup_init(THD *thd, + KEY *tmp_key, + Item_iterator &it, + bool value, + uint skip) +{ + uint tmp_key_parts= tmp_key->user_defined_key_parts; + uint i; + DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init"); + + key= 0; /* The only temp table index. */ + key_length= tmp_key->key_length; + if (!(key_buff= + (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || + !(key_copy= + (store_key**) thd->alloc((sizeof(store_key*) * + (tmp_key_parts + 1)))) || + !(items= + (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) + DBUG_RETURN(TRUE); + + key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length); + + KEY_PART_INFO *cur_key_part= tmp_key->key_part; + store_key **ref_key= key_copy; + uchar *cur_ref_buff= key_buff; + + it.open(); + for (i= 0; i < skip; i++) it.next(); + for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + { + Item *item= it.next(); + DBUG_ASSERT(item); + items[i]= item; + int null_count= MY_TEST(cur_key_part->field->real_maybe_null()); + *ref_key= new store_key_item(thd, cur_key_part->field, + /* TIMOUR: + the NULL byte is taken into account in + cur_key_part->store_length, so instead of + cur_ref_buff + MY_TEST(maybe_null), we could + use that information instead. + */ + cur_ref_buff + null_count, + null_count ? cur_ref_buff : 0, + cur_key_part->length, items[i], value); + cur_ref_buff+= cur_key_part->store_length; + } + *ref_key= NULL; /* End marker. */ + key_err= 1; + key_parts= tmp_key_parts; + DBUG_RETURN(FALSE); +} + + +/* + Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates + between ref access and full table scan) +*/ + +bool TABLE_REF::is_access_triggered() +{ + for (uint i = 0; i < key_parts; i++) + { + if (cond_guards[i]) + return TRUE; + } + return FALSE; +} + + +/** + Partially cleanup JOIN after it has executed: close index or rnd read + (table cursors), free quick selects. + + This function is called in the end of execution of a JOIN, before the used + tables are unlocked and closed. + + For a join that is resolved using a temporary table, the first sweep is + performed against actual tables and an intermediate result is inserted + into the temprorary table. + The last sweep is performed against the temporary table. Therefore, + the base tables and associated buffers used to fill the temporary table + are no longer needed, and this function is called to free them. + + For a join that is performed without a temporary table, this function + is called after all rows are sent, but before EOF packet is sent. + + For a simple SELECT with no subqueries this function performs a full + cleanup of the JOIN and calls mysql_unlock_read_tables to free used base + tables. + + If a JOIN is executed for a subquery or if it has a subquery, we can't + do the full cleanup and need to do a partial cleanup only. + - If a JOIN is not the top level join, we must not unlock the tables + because the outer select may not have been evaluated yet, and we + can't unlock only selected tables of a query. + - Additionally, if this JOIN corresponds to a correlated subquery, we + should not free quick selects and join buffers because they will be + needed for the next execution of the correlated subquery. + - However, if this is a JOIN for a [sub]select, which is not + a correlated subquery itself, but has subqueries, we can free it + fully and also free JOINs of all its subqueries. The exception + is a subquery in SELECT list, e.g: @n + SELECT a, (select MY_MAX(b) from t1) group by c @n + This subquery will not be evaluated at first sweep and its value will + not be inserted into the temporary table. Instead, it's evaluated + when selecting from the temporary table. Therefore, it can't be freed + here even though it's not correlated. + + @todo + Unlock tables even if the join isn't top level select in the tree +*/ + +void JOIN::join_free() +{ + SELECT_LEX_UNIT *tmp_unit; + SELECT_LEX *sl; + /* + Optimization: if not EXPLAIN and we are done with the JOIN, + free all tables. + */ + bool full= !(select_lex->uncacheable) && !(thd->lex->describe); + bool can_unlock= full; + DBUG_ENTER("JOIN::join_free"); + + cleanup(full); + + for (tmp_unit= select_lex->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + { + if (tmp_unit->with_element && tmp_unit->with_element->is_recursive) + continue; + for (sl= tmp_unit->first_select(); sl; sl= sl->next_select()) + { + Item_subselect *subselect= sl->master_unit()->item; + bool full_local= full && (!subselect || subselect->is_evaluated()); + /* + If this join is evaluated, we can fully clean it up and clean up all + its underlying joins even if they are correlated -- they will not be + used any more anyway. + If this join is not yet evaluated, we still must clean it up to + close its table cursors -- it may never get evaluated, as in case of + ... HAVING FALSE OR a IN (SELECT ...)) + but all table cursors must be closed before the unlock. + */ + sl->cleanup_all_joins(full_local); + /* Can't unlock if at least one JOIN is still needed */ + can_unlock= can_unlock && full_local; + } + } + /* + We are not using tables anymore + Unlock all tables. We may be in an INSERT .... SELECT statement. + */ + if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode && + !(select_options & SELECT_NO_UNLOCK) && + !select_lex->subquery_in_having && + (select_lex == (thd->lex->unit.fake_select_lex ? + thd->lex->unit.fake_select_lex : + thd->lex->first_select_lex()))) + { + /* + TODO: unlock tables even if the join isn't top level select in the + tree. + */ + mysql_unlock_read_tables(thd, lock); // Don't free join->lock + lock= 0; + } + + DBUG_VOID_RETURN; +} + + +/** + Free resources of given join. + + @param full true if we should free all resources, call with full==1 + should be last, before it this function can be called with + full==0 + + @note + With subquery this function definitely will be called several times, + but even for simple query it can be called several times. +*/ + +void JOIN::cleanup(bool full) +{ + DBUG_ENTER("JOIN::cleanup"); + DBUG_PRINT("enter", ("select: %d (%p) join: %p full: %u", + select_lex->select_number, select_lex, this, + (uint) full)); + + if (full) + have_query_plan= QEP_DELETED; + + if (original_join_tab) + { + /* Free the original optimized join created for the group_by_handler */ + join_tab= original_join_tab; + original_join_tab= 0; + } + + if (join_tab) + { + JOIN_TAB *tab; + + if (full) + { + /* + Call cleanup() on join tabs used by the join optimization + (join->join_tab may now be pointing to result of make_simple_join + reading from the temporary table) + + We also need to check table_count to handle various degenerate joins + w/o tables: they don't have some members initialized and + WALK_OPTIMIZATION_TABS may not work correctly for them. + */ + if (top_join_tab_count && tables_list) + { + for (tab= first_breadth_first_tab(); tab; + tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab)) + { + tab->cleanup(); + delete tab->filesort_result; + tab->filesort_result= NULL; + } + } + cleaned= true; + //psergey2: added (Q: why not in the above loop?) + { + JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (curr_tab->aggr) + { + free_tmp_table(thd, curr_tab->table); + curr_tab->table= NULL; + delete curr_tab->tmp_table_param; + curr_tab->tmp_table_param= NULL; + curr_tab->aggr= NULL; + + delete curr_tab->filesort_result; + curr_tab->filesort_result= NULL; + } + } + aggr_tables= 0; // psergey3 + } + } + else + { + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + tab->partial_cleanup(); + } + } + } + if (full) + { + cleanup_empty_jtbm_semi_joins(this, join_list); + + // Run Cached_item DTORs! + group_fields.delete_elements(); + order_fields.delete_elements(); + + /* + We can't call delete_elements() on copy_funcs as this will cause + problems in free_elements() as some of the elements are then deleted. + */ + tmp_table_param.copy_funcs.empty(); + /* + If we have tmp_join and 'this' JOIN is not tmp_join and + tmp_table_param.copy_field's of them are equal then we have to remove + pointer to tmp_table_param.copy_field from tmp_join, because it will + be removed in tmp_table_param.cleanup(). + */ + tmp_table_param.cleanup(); + + delete pushdown_query; + pushdown_query= 0; + + if (!join_tab) + { + List_iterator<TABLE_LIST> li(*join_list); + TABLE_LIST *table_ref; + while ((table_ref= li++)) + { + if (table_ref->table && + table_ref->jtbm_subselect && + table_ref->jtbm_subselect->is_jtbm_const_tab) + { + free_tmp_table(thd, table_ref->table); + table_ref->table= NULL; + } + } + } + free_pushdown_handlers(*join_list); + } + /* Restore ref array to original state */ + if (current_ref_ptrs != items0) + { + set_items_ref_array(items0); + set_group_rpa= false; + } + DBUG_VOID_RETURN; +} + +/** + Clean up all derived pushdown handlers in this join. + + @detail + Note that dt_handler is picked at the prepare stage (as opposed + to optimization stage where one could expect this). + Because of that, we have to do cleanups in this function that is called + from JOIN::cleanup() and not in JOIN_TAB::cleanup. + */ +void JOIN::free_pushdown_handlers(List<TABLE_LIST>& join_list) +{ + List_iterator<TABLE_LIST> li(join_list); + TABLE_LIST *table_ref; + while ((table_ref= li++)) + { + if (table_ref->nested_join) + free_pushdown_handlers(table_ref->nested_join->join_list); + if (table_ref->pushdown_derived) + { + delete table_ref->pushdown_derived; + table_ref->pushdown_derived= NULL; + } + delete table_ref->dt_handler; + table_ref->dt_handler= NULL; + } +} + +/** + Remove the following expressions from ORDER BY and GROUP BY: + Constant expressions @n + Expression that only uses tables that are of type EQ_REF and the reference + is in the ORDER list or if all refereed tables are of the above type. + + In the following, the X field can be removed: + @code + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X + SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X + @endcode + + These can't be optimized: + @code + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a + SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c + SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a + @endcode + + TODO: this function checks ORDER::used, which can only have a value of 0. +*/ + +static bool +eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab) +{ + if (tab->cached_eq_ref_table) // If cached + return tab->eq_ref_table; + tab->cached_eq_ref_table=1; + /* We can skip const tables only if not an outer table */ + if (tab->type == JT_CONST && !tab->first_inner) + return (tab->eq_ref_table=1); /* purecov: inspected */ + if (tab->type != JT_EQ_REF || tab->table->maybe_null) + return (tab->eq_ref_table=0); // We must use this + Item **ref_item=tab->ref.items; + Item **end=ref_item+tab->ref.key_parts; + uint found=0; + table_map map=tab->table->map; + + for (; ref_item != end ; ref_item++) + { + if (! (*ref_item)->const_item()) + { // Not a const ref + ORDER *order; + for (order=start_order ; order ; order=order->next) + { + if ((*ref_item)->eq(order->item[0],0)) + break; + } + if (order) + { + if (!(order->used & map)) + { + found++; + order->used|= map; + } + continue; // Used in ORDER BY + } + if (!only_eq_ref_tables(join,start_order, (*ref_item)->used_tables())) + return (tab->eq_ref_table=0); + } + } + /* Check that there was no reference to table before sort order */ + for (; found && start_order ; start_order=start_order->next) + { + if (start_order->used & map) + { + found--; + continue; + } + if (start_order->depend_map & map) + return (tab->eq_ref_table=0); + } + return tab->eq_ref_table=1; +} + + +static bool +only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables) +{ + tables&= ~PSEUDO_TABLE_BITS; + for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1) + { + if (tables & 1 && !eq_ref_table(join, order, *tab)) + return 0; + } + return 1; +} + + +/** Update the dependency map for the tables. */ + +static void update_depend_map(JOIN *join) +{ + JOIN_TAB *join_tab; + for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES); + join_tab; + join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS)) + { + TABLE_REF *ref= &join_tab->ref; + table_map depend_map=0; + Item **item=ref->items; + uint i; + for (i=0 ; i < ref->key_parts ; i++,item++) + depend_map|=(*item)->used_tables(); + depend_map&= ~OUTER_REF_TABLE_BIT; + ref->depend_map= depend_map; + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++,depend_map>>=1 ) + { + if (depend_map & 1) + ref->depend_map|=(*tab)->ref.depend_map; + } + } +} + + +/** Update the dependency map for the sort order. */ + +static void update_depend_map_for_order(JOIN *join, ORDER *order) +{ + for (; order ; order=order->next) + { + table_map depend_map; + order->item[0]->update_used_tables(); + order->depend_map=depend_map=order->item[0]->used_tables(); + order->used= 0; + // Not item_sum(), RAND() and no reference to table outside of sub select + if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) + && !order->item[0]->with_sum_func() && + join->join_tab) + { + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++, depend_map>>=1) + { + if (depend_map & 1) + order->depend_map|=(*tab)->ref.depend_map; + } + } + } +} + + +/** + Remove all constants from ORDER and check if ORDER only contains simple + expressions. + + We also remove all duplicate expressions, keeping only the first one. + + simple_order is set to 1 if sort_order only uses fields from head table + and the head table is not a LEFT JOIN table. + + @param join Join handler + @param first_order List of SORT or GROUP order + @param cond WHERE statement + @param change_list Set to 1 if we should remove things from list. + If this is not set, then only simple_order is + calculated. This is not set when we + are using ROLLUP + @param simple_order Set to 1 if we are only using simple + expressions. + + @return + Returns new sort order +*/ + +static ORDER * +remove_const(JOIN *join,ORDER *first_order, COND *cond, + bool change_list, bool *simple_order) +{ + /* + We can't do ORDER BY using filesort if the select list contains a non + deterministic value like RAND() or ROWNUM(). + For example: + SELECT a,ROWNUM() FROM t1 ORDER BY a; + + If we would first sort the table 't1', the ROWNUM() column would be + generated during end_send() and the order would be wrong. + + Previously we had here also a test of ROLLUP: + 'join->rollup.state == ROLLUP::STATE_NONE' + + I deleted this because the ROLLUP was never enforced because of a + bug where the inital value of simple_order was ignored. Having + ROLLUP tested now when the code is fixed, causes many test failure + and some wrong results so better to leave the code as it was + related to ROLLUP. + */ + *simple_order= !join->select_lex->rownum_in_field_list; + if (join->only_const_tables()) + return change_list ? 0 : first_order; // No need to sort + + ORDER *order,**prev_ptr, *tmp_order; + table_map UNINIT_VAR(first_table); /* protected by first_is_base_table */ + table_map not_const_tables= ~join->const_table_map; + table_map ref; + bool first_is_base_table= FALSE; + DBUG_ENTER("remove_const"); + + /* + Join tab is set after make_join_statistics() has been called. + In case of one table with GROUP BY this function is called before + join_tab is set for the GROUP_BY expression + */ + if (join->join_tab) + { + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + + /* + Cleanup to avoid interference of calls of this function for + ORDER BY and GROUP BY + */ + for (JOIN_TAB *tab= join->join_tab + join->const_tables; + tab < join->join_tab + join->top_join_tab_count; + tab++) + tab->cached_eq_ref_table= FALSE; + + JOIN_TAB *head= join->join_tab + join->const_tables; + *simple_order&= head->on_expr_ref[0] == NULL; + if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS) + { + uint u1, u2, u3, u4; + /* + normally the condition is (see filesort_use_addons()) + + length + sortlength <= max_length_for_sort_data + + but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative + is to use a temporary table, which is rather expensive. + + TODO proper cost estimations + */ + *simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3, &u4); + } + } + else + { + first_is_base_table= FALSE; + first_table= 0; // Not used, for gcc + } + + prev_ptr= &first_order; + + /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ + + update_depend_map_for_order(join, first_order); + for (order=first_order; order ; order=order->next) + { + table_map order_tables=order->item[0]->used_tables(); + if (order->item[0]->with_sum_func() || + order->item[0]->with_window_func() || + /* + If the outer table of an outer join is const (either by itself or + after applying WHERE condition), grouping on a field from such a + table will be optimized away and filesort without temporary table + will be used unless we prevent that now. Filesort is not fit to + handle joins and the join condition is not applied. We can't detect + the case without an expensive test, however, so we force temporary + table for all queries containing more than one table, ROLLUP, and an + outer join. + */ + (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED && + join->outer_join)) + *simple_order=0; // Must do a temp table to sort + else if (!(order_tables & not_const_tables)) + { + if (order->item[0]->with_subquery()) + { + /* + Delay the evaluation of constant ORDER and/or GROUP expressions that + contain subqueries until the execution phase. + */ + join->exec_const_order_group_cond.push_back(order->item[0], + join->thd->mem_root); + } + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + else + { + if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)) + *simple_order=0; + else + { + if (cond && const_expression_in_where(cond,order->item[0])) + { + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + if (first_is_base_table && + (ref=order_tables & (not_const_tables ^ first_table))) + { + if (!(order_tables & first_table) && + only_eq_ref_tables(join,first_order, ref)) + { + DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); + continue; + } + /* + UseMultipleEqualitiesToRemoveTempTable: + Can use multiple-equalities here to check that ORDER BY columns + can be used without tmp. table. + */ + bool can_subst_to_first_table= false; + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && + first_is_base_table && + order->item[0]->real_item()->type() == Item::FIELD_ITEM && + join->cond_equal) + { + table_map first_table_bit= + join->join_tab[join->const_tables].table->map; + + Item *item= order->item[0]; + + /* + TODO: equality substitution in the context of ORDER BY is + sometimes allowed when it is not allowed in the general case. + + We make the below call for its side effect: it will locate the + multiple equality the item belongs to and set item->item_equal + accordingly. + */ + Item *res= item->propagate_equal_fields(join->thd, + Value_source:: + Context_identity(), + join->cond_equal); + Item_equal *item_eq; + if ((item_eq= res->get_item_equal())) + { + Item *first= item_eq->get_first(NO_PARTICULAR_TAB, NULL); + if (first->const_item() || first->used_tables() == + first_table_bit) + { + can_subst_to_first_table= true; + } + } + } + + if (!can_subst_to_first_table) + { + *simple_order=0; // Must do a temp table to sort + } + } + } + } + /* Remove ORDER BY entries that we have seen before */ + for (tmp_order= first_order; + tmp_order != order; + tmp_order= tmp_order->next) + { + if (tmp_order->item[0]->eq(order->item[0],1)) + break; + } + if (tmp_order != order) + continue; // Duplicate order by. Remove + + if (change_list) + *prev_ptr= order; // use this entry + prev_ptr= &order->next; + } + if (change_list) + *prev_ptr=0; + if (prev_ptr == &first_order) // Nothing to sort/group + *simple_order=1; +#ifndef DBUG_OFF + if (unlikely(join->thd->is_error())) + DBUG_PRINT("error",("Error from remove_const")); +#endif + DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order)); + DBUG_RETURN(first_order); +} + + +/** + Filter out ORDER items those are equal to constants in WHERE + + This function is a limited version of remove_const() for use + with non-JOIN statements (i.e. single-table UPDATE and DELETE). + + + @param order Linked list of ORDER BY arguments + @param cond WHERE expression + + @return pointer to new filtered ORDER list or NULL if whole list eliminated + + @note + This function overwrites input order list. +*/ + +ORDER *simple_remove_const(ORDER *order, COND *where) +{ + if (!order || !where) + return order; + + ORDER *first= NULL, *prev= NULL; + for (; order; order= order->next) + { + DBUG_ASSERT(!order->item[0]->with_sum_func()); // should never happen + if (!const_expression_in_where(where, order->item[0])) + { + if (!first) + first= order; + if (prev) + prev->next= order; + prev= order; + } + } + if (prev) + prev->next= NULL; + return first; +} + + +/* + Set all fields in the table to have a null value + + @param tables Table list +*/ + +static void make_tables_null_complemented(List<TABLE_LIST> *tables) +{ + List_iterator<TABLE_LIST> ti(*tables); + TABLE_LIST *table; + while ((table= ti++)) + { + /* + Don't touch semi-join materialization tables, as the a join_free() + call may have freed them (and HAVING clause can't have references to + them anyway). + */ + if (!table->is_jtbm()) + { + TABLE *tbl= table->table; + mark_as_null_row(tbl); // Set fields to NULL + } + } +} + + +static int +return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> *tables, + List<Item> *fields, bool send_row, ulonglong select_options, + const char *info, Item *having, List<Item> *all_fields) +{ + DBUG_ENTER("return_zero_rows"); + + if (select_options & SELECT_DESCRIBE) + { + select_describe(join, FALSE, FALSE, FALSE, info); + DBUG_RETURN(0); + } + + if (send_row) + { + /* + Set all tables to have NULL row. This is needed as we will be evaluating + HAVING condition. + */ + make_tables_null_complemented(tables); + + List_iterator_fast<Item> it(*all_fields); + Item *item; + /* + Inform all items (especially aggregating) to calculate HAVING correctly, + also we will need it for sending results. + */ + join->no_rows_in_result_called= 1; + while ((item= it++)) + item->no_rows_in_result(); + if (having && having->val_int() == 0) + send_row=0; + } + + /* Update results for FOUND_ROWS */ + if (!join->send_row_on_empty_set()) + { + join->thd->set_examined_row_count(0); + join->thd->limit_found_rows= 0; + } + + if (!(result->send_result_set_metadata(*fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))) + { + bool send_error= FALSE; + if (send_row) + send_error= result->send_data_with_check(*fields, join->unit, 0) > 0; + if (likely(!send_error)) + result->send_eof(); // Should be safe + } + /* + JOIN::join_free() must be called after the virtual method + select::send_result_set_metadata() returned control since + implementation of this method could use data strutcures + that are released by the method JOIN::join_free(). + */ + join->join_free(); + + DBUG_RETURN(0); +} + +/** + Reset table rows to contain a null-complement row (all fields are null) + + Used only in JOIN::clear() and in do_select() if there where no matching rows. + + @param join JOIN + @param cleared_tables Used to mark all cleared tables in the map. Needed for + unclear_tables() to know which tables to restore to + their original state. +*/ + +static void clear_tables(JOIN *join, table_map *cleared_tables) +{ + DBUG_ASSERT(cleared_tables); + for (uint i= 0 ; i < join->table_count ; i++) + { + TABLE *table= join->table[i]; + + if (table->null_row) + continue; // Nothing more to do + (*cleared_tables)|= (((table_map) 1) << i); + if (table->s->null_bytes) + { + /* + Remember null bits for the record so that we can restore the + original const record in unclear_tables() + */ + memcpy(table->record[1], table->null_flags, table->s->null_bytes); + } + mark_as_null_row(table); // All fields are NULL + } +} + + +/** + Reverse null marking for tables and restore null bits. + This return the tables to the state of before clear_tables(). + + We have to do this because the tables may be re-used in a sub query + and the subquery will assume that the const tables contains the original + data before clear_tables(). +*/ + +static void unclear_tables(JOIN *join, table_map *cleared_tables) +{ + for (uint i= 0 ; i < join->table_count ; i++) + { + if ((*cleared_tables) & (((table_map) 1) << i)) + { + TABLE *table= join->table[i]; + if (table->s->null_bytes) + memcpy(table->null_flags, table->record[1], table->s->null_bytes); + unmark_as_null_row(table); + } + } +} + + +/***************************************************************************** + Make som simple condition optimization: + If there is a test 'field = const' change all refs to 'field' to 'const' + Remove all dummy tests 'item = item', 'const op const'. + Remove all 'item is NULL', when item can never be null! + item->marker should be 0 for all items on entry + Return in cond_value FALSE if condition is impossible (1 = 2) +*****************************************************************************/ + +class COND_CMP :public ilink { +public: + static void *operator new(size_t size, MEM_ROOT *mem_root) + { + return alloc_root(mem_root, size); + } + static void operator delete(void *ptr __attribute__((unused)), + size_t size __attribute__((unused))) + { TRASH_FREE(ptr, size); } + + static void operator delete(void *, MEM_ROOT*) {} + + Item *and_level; + Item_bool_func2 *cmp_func; + COND_CMP(Item *a,Item_bool_func2 *b) :and_level(a),cmp_func(b) {} +}; + +/** + Find the multiple equality predicate containing a field. + + The function retrieves the multiple equalities accessed through + the con_equal structure from current level and up looking for + an equality containing field. It stops retrieval as soon as the equality + is found and set up inherited_fl to TRUE if it's found on upper levels. + + @param cond_equal multiple equalities to search in + @param field field to look for + @param[out] inherited_fl set up to TRUE if multiple equality is found + on upper levels (not on current level of + cond_equal) + + @return + - Item_equal for the found multiple equality predicate if a success; + - NULL otherwise. +*/ + +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl) +{ + Item_equal *item= 0; + bool in_upper_level= FALSE; + while (cond_equal) + { + List_iterator_fast<Item_equal> li(cond_equal->current_level); + while ((item= li++)) + { + if (item->contains(field)) + goto finish; + } + in_upper_level= TRUE; + cond_equal= cond_equal->upper_levels; + } + in_upper_level= FALSE; +finish: + *inherited_fl= in_upper_level; + return item; +} + + +/** + Check whether an equality can be used to build multiple equalities. + + This function first checks whether the equality (left_item=right_item) + is a simple equality i.e. the one that equates a field with another field + or a constant (field=field_item or field=const_item). + If this is the case the function looks for a multiple equality + in the lists referenced directly or indirectly by cond_equal inferring + the given simple equality. If it doesn't find any, it builds a multiple + equality that covers the predicate, i.e. the predicate can be inferred + from this multiple equality. + The built multiple equality could be obtained in such a way: + create a binary multiple equality equivalent to the predicate, then + merge it, if possible, with one of old multiple equalities. + This guarantees that the set of multiple equalities covering equality + predicates will be minimal. + + EXAMPLE: + For the where condition + @code + WHERE a=b AND b=c AND + (b=2 OR f=e) + @endcode + the check_equality will be called for the following equality + predicates a=b, b=c, b=2 and f=e. + - For a=b it will be called with *cond_equal=(0,[]) and will transform + *cond_equal into (0,[Item_equal(a,b)]). + - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)]) + and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]). + - For b=2 it will be called with *cond_equal=(ptr(CE),[]) + and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]). + - For f=e it will be called with *cond_equal=(ptr(CE), []) + and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]). + + @note + Now only fields that have the same type definitions (verified by + the Field::eq_def method) are placed to the same multiple equalities. + Because of this some equality predicates are not eliminated and + can be used in the constant propagation procedure. + We could weeken the equlity test as soon as at least one of the + equal fields is to be equal to a constant. It would require a + more complicated implementation: we would have to store, in + general case, its own constant for each fields from the multiple + equality. But at the same time it would allow us to get rid + of constant propagation completely: it would be done by the call + to cond->build_equal_items(). + + + The implementation does not follow exactly the above rules to + build a new multiple equality for the equality predicate. + If it processes the equality of the form field1=field2, it + looks for multiple equalities me1 containig field1 and me2 containing + field2. If only one of them is found the fuction expands it with + the lacking field. If multiple equalities for both fields are + found they are merged. If both searches fail a new multiple equality + containing just field1 and field2 is added to the existing + multiple equalities. + If the function processes the predicate of the form field1=const, + it looks for a multiple equality containing field1. If found, the + function checks the constant of the multiple equality. If the value + is unknown, it is setup to const. Otherwise the value is compared with + const and the evaluation of the equality predicate is performed. + When expanding/merging equality predicates from the upper levels + the function first copies them for the current level. It looks + acceptable, as this happens rarely. The implementation without + copying would be much more complicated. + + For description of how equality propagation works with SJM nests, grep + for EqualityPropagationAndSjmNests. + + @param left_item left term of the quality to be checked + @param right_item right term of the equality to be checked + @param item equality item if the equality originates from a condition + predicate, 0 if the equality is the result of row + elimination + @param cond_equal multiple equalities that must hold together with the + equality + + @retval + TRUE if the predicate is a simple equality predicate to be used + for building multiple equalities + @retval + FALSE otherwise +*/ + +bool check_simple_equality(THD *thd, const Item::Context &ctx, + Item *left_item, Item *right_item, + COND_EQUAL *cond_equal) +{ + Item *orig_left_item= left_item; + Item *orig_right_item= right_item; + if (left_item->type() == Item::REF_ITEM) + { + Item_ref::Ref_Type left_ref= ((Item_ref*)left_item)->ref_type(); + + if (left_ref == Item_ref::VIEW_REF || + left_ref == Item_ref::REF) + { + if (((Item_ref*)left_item)->get_depended_from()) + return FALSE; + if (left_ref == Item_ref::VIEW_REF && + ((Item_direct_view_ref*)left_item)->get_null_ref_table() != + NO_NULL_TABLE && + !left_item->real_item()->used_tables()) + return FALSE; + left_item= left_item->real_item(); + } + } + if (right_item->type() == Item::REF_ITEM) + { + Item_ref::Ref_Type right_ref= ((Item_ref*)right_item)->ref_type(); + if (right_ref == Item_ref::VIEW_REF || + (right_ref == Item_ref::REF)) + { + if (((Item_ref*)right_item)->get_depended_from()) + return FALSE; + if (right_ref == Item_ref::VIEW_REF && + ((Item_direct_view_ref*)right_item)->get_null_ref_table() != + NO_NULL_TABLE && + !right_item->real_item()->used_tables()) + return FALSE; + right_item= right_item->real_item(); + } + } + if (left_item->type() == Item::FIELD_ITEM && + right_item->type() == Item::FIELD_ITEM && + !((Item_field*)left_item)->get_depended_from() && + !((Item_field*)right_item)->get_depended_from()) + { + /* The predicate the form field1=field2 is processed */ + + Field *left_field= ((Item_field*) left_item)->field; + Field *right_field= ((Item_field*) right_item)->field; + + if (!left_field->eq_def(right_field) && + !fields_equal_using_narrowing(thd, left_field, right_field)) + return FALSE; + + /* Search for multiple equalities containing field1 and/or field2 */ + bool left_copyfl, right_copyfl; + Item_equal *left_item_equal= + find_item_equal(cond_equal, left_field, &left_copyfl); + Item_equal *right_item_equal= + find_item_equal(cond_equal, right_field, &right_copyfl); + + /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */ + if (left_field->eq(right_field)) /* f = f */ + return (!(left_field->maybe_null() && !left_item_equal)); + + if (left_item_equal && left_item_equal == right_item_equal) + { + /* + The equality predicate is inference of one of the existing + multiple equalities, i.e the condition is already covered + by upper level equalities + */ + return TRUE; + } + + /* Copy the found multiple equalities at the current level if needed */ + if (left_copyfl) + { + /* left_item_equal of an upper level contains left_item */ + left_item_equal= new (thd->mem_root) Item_equal(thd, left_item_equal); + left_item_equal->set_context_field(((Item_field*) left_item)); + cond_equal->current_level.push_back(left_item_equal, thd->mem_root); + } + if (right_copyfl) + { + /* right_item_equal of an upper level contains right_item */ + right_item_equal= new (thd->mem_root) Item_equal(thd, right_item_equal); + right_item_equal->set_context_field(((Item_field*) right_item)); + cond_equal->current_level.push_back(right_item_equal, thd->mem_root); + } + + if (left_item_equal) + { + /* left item was found in the current or one of the upper levels */ + if (! right_item_equal) + left_item_equal->add(orig_right_item, thd->mem_root); + else + { + /* Merge two multiple equalities forming a new one */ + left_item_equal->merge(thd, right_item_equal); + /* Remove the merged multiple equality from the list */ + List_iterator<Item_equal> li(cond_equal->current_level); + while ((li++) != right_item_equal) ; + li.remove(); + } + } + else + { + /* left item was not found neither the current nor in upper levels */ + if (right_item_equal) + right_item_equal->add(orig_left_item, thd->mem_root); + else + { + /* None of the fields was found in multiple equalities */ + Type_handler_hybrid_field_type + tmp(orig_left_item->type_handler_for_comparison()); + if (tmp.aggregate_for_comparison(orig_right_item-> + type_handler_for_comparison())) + return false; + Item_equal *item_equal= + new (thd->mem_root) Item_equal(thd, tmp.type_handler(), + orig_left_item, orig_right_item, + false); + item_equal->set_context_field((Item_field*)left_item); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + } + } + return TRUE; + } + + { + /* The predicate of the form field=const/const=field is processed */ + Item *const_item= 0; + Item_field *field_item= 0; + Item *orig_field_item= 0; + if (left_item->type() == Item::FIELD_ITEM && + !((Item_field*)left_item)->get_depended_from() && + right_item->can_eval_in_optimize()) + { + orig_field_item= orig_left_item; + field_item= (Item_field *) left_item; + const_item= right_item; + } + else if (right_item->type() == Item::FIELD_ITEM && + !((Item_field*)right_item)->get_depended_from() && + left_item->can_eval_in_optimize()) + { + orig_field_item= orig_right_item; + field_item= (Item_field *) right_item; + const_item= left_item; + } + + if (const_item && + field_item->field->test_if_equality_guarantees_uniqueness(const_item)) + { + /* + field_item and const_item are arguments of a scalar or a row + comparison function: + WHERE column=constant + WHERE (column, ...) = (constant, ...) + + The owner comparison function has previously called fix_fields(), + so field_item and const_item should be directly comparable items, + field_item->cmp_context and const_item->cmp_context should be set. + In case of string comparison, charsets and collations of + field_item and const_item should have already be aggregated + for comparison, all necessary character set converters installed + and fixed. + + In case of string comparison, const_item can be either: + - a weaker constant that does not need to be converted to field_item: + WHERE latin1_field = 'latin1_const' + WHERE varbinary_field = 'latin1_const' + WHERE latin1_bin_field = 'latin1_general_ci_const' + - a stronger constant that does not need to be converted to field_item: + WHERE latin1_field = binary 0xDF + WHERE latin1_field = 'a' COLLATE latin1_bin + - a result of conversion (e.g. from the session character set) + to the character set of field_item: + WHERE latin1_field = 'utf8_string_with_latin1_repertoire' + */ + bool copyfl; + + Item_equal *item_equal = find_item_equal(cond_equal, + field_item->field, ©fl); + if (copyfl) + { + item_equal= new (thd->mem_root) Item_equal(thd, item_equal); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + item_equal->set_context_field(field_item); + } + Item *const_item2= field_item->field->get_equal_const_item(thd, ctx, + const_item); + if (!const_item2) + return false; + + if (item_equal) + { + /* + The flag cond_false will be set to 1 after this, if item_equal + already contains a constant and its value is not equal to + the value of const_item. + */ + item_equal->add_const(thd, const_item2); + } + else + { + Type_handler_hybrid_field_type + tmp(orig_left_item->type_handler_for_comparison()); + if (tmp.aggregate_for_comparison(orig_right_item-> + type_handler_for_comparison())) + return false; + item_equal= new (thd->mem_root) Item_equal(thd, tmp.type_handler(), + const_item2, + orig_field_item, true); + item_equal->set_context_field(field_item); + cond_equal->current_level.push_back(item_equal, thd->mem_root); + } + return TRUE; + } + } + return FALSE; +} + + +/** + Convert row equalities into a conjunction of regular equalities. + + The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n) + into a list of equalities E1=E'1,...,En=E'n. For each of these equalities + Ei=E'i the function checks whether it is a simple equality or a row + equality. If it is a simple equality it is used to expand multiple + equalities of cond_equal. If it is a row equality it converted to a + sequence of equalities between row elements. If Ei=E'i is neither a + simple equality nor a row equality the item for this predicate is added + to eq_list. + + @param thd thread handle + @param left_row left term of the row equality to be processed + @param right_row right term of the row equality to be processed + @param cond_equal multiple equalities that must hold together with the + predicate + @param eq_list results of conversions of row equalities that are not + simple enough to form multiple equalities + + @retval + TRUE if conversion has succeeded (no fatal error) + @retval + FALSE otherwise +*/ + +static bool check_row_equality(THD *thd, const Arg_comparator *comparators, + Item *left_row, Item_row *right_row, + COND_EQUAL *cond_equal, List<Item>* eq_list) +{ + uint n= left_row->cols(); + for (uint i= 0 ; i < n; i++) + { + bool is_converted; + Item *left_item= left_row->element_index(i); + Item *right_item= right_row->element_index(i); + if (left_item->type() == Item::ROW_ITEM && + right_item->type() == Item::ROW_ITEM) + { + /* + Item_splocal for ROW SP variables return Item::ROW_ITEM. + Here we know that left_item and right_item are not Item_splocal, + because ROW SP variables with nested ROWs are not supported yet. + It's safe to cast left_item and right_item to Item_row. + */ + DBUG_ASSERT(!left_item->get_item_splocal()); + DBUG_ASSERT(!right_item->get_item_splocal()); + is_converted= check_row_equality(thd, + comparators[i].subcomparators(), + (Item_row *) left_item, + (Item_row *) right_item, + cond_equal, eq_list); + } + else + { + const Arg_comparator *tmp= &comparators[i]; + is_converted= check_simple_equality(thd, + Item::Context(Item::ANY_SUBST, + tmp->compare_type_handler(), + tmp->compare_collation()), + left_item, right_item, + cond_equal); + } + + if (!is_converted) + { + Item_func_eq *eq_item; + if (!(eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item)) || + eq_item->set_cmp_func(thd)) + return FALSE; + eq_item->quick_fix_field(); + eq_list->push_back(eq_item, thd->mem_root); + } + } + return TRUE; +} + + +/** + Eliminate row equalities and form multiple equalities predicates. + + This function checks whether the item is a simple equality + i.e. the one that equates a field with another field or a constant + (field=field_item or field=constant_item), or, a row equality. + For a simple equality the function looks for a multiple equality + in the lists referenced directly or indirectly by cond_equal inferring + the given simple equality. If it doesn't find any, it builds/expands + multiple equality that covers the predicate. + Row equalities are eliminated substituted for conjunctive regular + equalities which are treated in the same way as original equality + predicates. + + @param thd thread handle + @param item predicate to process + @param cond_equal multiple equalities that must hold together with the + predicate + @param eq_list results of conversions of row equalities that are not + simple enough to form multiple equalities + + @retval + TRUE if re-writing rules have been applied + @retval + FALSE otherwise, i.e. + if the predicate is not an equality, + or, if the equality is neither a simple one nor a row equality, + or, if the procedure fails by a fatal error. +*/ + +bool Item_func_eq::check_equality(THD *thd, COND_EQUAL *cond_equal, + List<Item> *eq_list) +{ + Item *left_item= arguments()[0]; + Item *right_item= arguments()[1]; + + if (left_item->type() == Item::ROW_ITEM && + right_item->type() == Item::ROW_ITEM) + { + /* + Item_splocal::type() for ROW variables returns Item::ROW_ITEM. + Distinguish ROW-type Item_splocal from Item_row. + Example query: + SELECT 1 FROM DUAL WHERE row_sp_variable=ROW(100,200); + */ + if (left_item->get_item_splocal() || + right_item->get_item_splocal()) + return false; + return check_row_equality(thd, + cmp.subcomparators(), + (Item_row *) left_item, + (Item_row *) right_item, + cond_equal, eq_list); + } + return check_simple_equality(thd, + Context(ANY_SUBST, + compare_type_handler(), + compare_collation()), + left_item, right_item, cond_equal); +} + + +/** + Item_xxx::build_equal_items() + + Replace all equality predicates in a condition referenced by "this" + by multiple equality items. + + At each 'and' level the function detects items for equality predicates + and replaced them by a set of multiple equality items of class Item_equal, + taking into account inherited equalities from upper levels. + If an equality predicate is used not in a conjunction it's just + replaced by a multiple equality predicate. + For each 'and' level the function set a pointer to the inherited + multiple equalities in the cond_equal field of the associated + object of the type Item_cond_and. + The function also traverses the cond tree and and for each field reference + sets a pointer to the multiple equality item containing the field, if there + is any. If this multiple equality equates fields to a constant the + function replaces the field reference by the constant in the cases + when the field is not of a string type or when the field reference is + just an argument of a comparison predicate. + The function also determines the maximum number of members in + equality lists of each Item_cond_and object assigning it to + thd->lex->current_select->max_equal_elems. + + @note + Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of + f1=f2, .., fn-1=fn. It substitutes any inference from these + equality predicates that is equivalent to the conjunction. + Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as + it is equivalent to ((a1=a2) AND (a2=a3)). + The function always makes a substitution of all equality predicates occurred + in a conjuction for a minimal set of multiple equality predicates. + This set can be considered as a canonical representation of the + sub-conjunction of the equality predicates. + E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by + (=(t1.a,t2.b,t3.c) AND t2.b>5), not by + (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5); + while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by + (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5), + but if additionally =(t4.d,t2.b) is inherited, it + will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5) + + The function performs the substitution in a recursive descent by + the condtion tree, passing to the next AND level a chain of multiple + equality predicates which have been built at the upper levels. + The Item_equal items built at the level are attached to other + non-equality conjucts as a sublist. The pointer to the inherited + multiple equalities is saved in the and condition object (Item_cond_and). + This chain allows us for any field reference occurrence easily to find a + multiple equality that must be held for this occurrence. + For each AND level we do the following: + - scan it for all equality predicate (=) items + - join them into disjoint Item_equal() groups + - process the included OR conditions recursively to do the same for + lower AND levels. + + We need to do things in this order as lower AND levels need to know about + all possible Item_equal objects in upper levels. + + @param thd thread handle + @param inherited path to all inherited multiple equality items + + @return + pointer to the transformed condition, + whose Used_tables_and_const_cache is up to date, + so no additional update_used_tables() is needed on the result. +*/ + +COND *Item_cond_and::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + Item_equal *item_equal; + COND_EQUAL cond_equal; + cond_equal.upper_levels= inherited; + + if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL)) + return this; // Fatal error flag is set! + + List<Item> eq_list; + List<Item> *cond_args= argument_list(); + + List_iterator<Item> li(*cond_args); + Item *item; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + Retrieve all conjuncts of this level detecting the equality + that are subject to substitution by multiple equality items and + removing each such predicate from the conjunction after having + found/created a multiple equality whose inference the predicate is. + */ + while ((item= li++)) + { + /* + PS/SP note: we can safely remove a node from AND-OR + structure here because it's restored before each + re-execution of any prepared statement/stored procedure. + */ + if (item->check_equality(thd, &cond_equal, &eq_list)) + li.remove(); + } + + /* + Check if we eliminated all the predicates of the level, e.g. + (a=a AND b=b AND a=a). + */ + if (!cond_args->elements && + !cond_equal.current_level.elements && + !eq_list.elements) + return (Item*) Item_true; + + List_iterator_fast<Item_equal> it(cond_equal.current_level); + while ((item_equal= it++)) + { + item_equal->set_link_equal_fields(link_item_fields); + item_equal->fix_fields(thd, NULL); + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + } + + m_cond_equal.copy(cond_equal); + cond_equal.current_level= m_cond_equal.current_level; + inherited= &m_cond_equal; + + /* + Make replacement of equality predicates for lower levels + of the condition expression. + */ + li.rewind(); + while ((item= li++)) + { + Item *new_item; + if ((new_item= item->build_equal_items(thd, inherited, false, NULL)) + != item) + { + /* This replacement happens only for standalone equalities */ + /* + This is ok with PS/SP as the replacement is done for + cond_args of an AND/OR item, which are restored for each + execution of PS/SP. + */ + li.replace(new_item); + } + } + cond_args->append(&eq_list); + cond_args->append((List<Item> *)&cond_equal.current_level); + update_used_tables(); + if (cond_equal_ref) + *cond_equal_ref= &m_cond_equal; + return this; +} + + +COND *Item_cond::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + List<Item> *cond_args= argument_list(); + + List_iterator<Item> li(*cond_args); + Item *item; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + Make replacement of equality predicates for lower levels + of the condition expression. + Update used_tables_cache and const_item_cache on the way. + */ + used_tables_and_const_cache_init(); + while ((item= li++)) + { + Item *new_item; + if ((new_item= item->build_equal_items(thd, inherited, false, NULL)) + != item) + { + /* This replacement happens only for standalone equalities */ + /* + This is ok with PS/SP as the replacement is done for + arguments of an AND/OR item, which are restored for each + execution of PS/SP. + */ + li.replace(new_item); + } + used_tables_and_const_cache_join(new_item); + } + return this; +} + + +COND *Item_func_eq::build_equal_items(THD *thd, + COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + COND_EQUAL cond_equal; + cond_equal.upper_levels= inherited; + List<Item> eq_list; + + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + /* + If an equality predicate forms the whole and level, + we call it standalone equality and it's processed here. + E.g. in the following where condition + WHERE a=5 AND (b=5 or a=c) + (b=5) and (a=c) are standalone equalities. + In general we can't leave alone standalone eqalities: + for WHERE a=b AND c=d AND (b=c OR d=5) + b=c is replaced by =(a,b,c,d). + */ + if (Item_func_eq::check_equality(thd, &cond_equal, &eq_list)) + { + Item_equal *item_equal; + int n= cond_equal.current_level.elements + eq_list.elements; + if (n == 0) + return (Item*) Item_true; + else if (n == 1) + { + if ((item_equal= cond_equal.current_level.pop())) + { + item_equal->fix_fields(thd, NULL); + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + item_equal->upper_levels= inherited; + if (cond_equal_ref) + *cond_equal_ref= new (thd->mem_root) COND_EQUAL(item_equal, + thd->mem_root); + return item_equal; + } + Item *res= eq_list.pop(); + res->update_used_tables(); + DBUG_ASSERT(res->type() == FUNC_ITEM); + return res; + } + else + { + /* + Here a new AND level must be created. It can happen only + when a row equality is processed as a standalone predicate. + */ + Item_cond_and *and_cond= new (thd->mem_root) Item_cond_and(thd, eq_list); + and_cond->quick_fix_field(); + List<Item> *cond_args= and_cond->argument_list(); + List_iterator_fast<Item_equal> it(cond_equal.current_level); + while ((item_equal= it++)) + { + if (item_equal->fix_length_and_dec(thd)) + return NULL; + item_equal->update_used_tables(); + set_if_bigger(thd->lex->current_select->max_equal_elems, + item_equal->n_field_items()); + } + and_cond->m_cond_equal.copy(cond_equal); + cond_equal.current_level= and_cond->m_cond_equal.current_level; + cond_args->append((List<Item> *)&cond_equal.current_level); + and_cond->update_used_tables(); + if (cond_equal_ref) + *cond_equal_ref= &and_cond->m_cond_equal; + return and_cond; + } + } + return Item_func::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); +} + + +COND *Item_func::build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + /* + For each field reference in cond, not from equal item predicates, + set a pointer to the multiple equality it belongs to (if there is any) + as soon the field is not of a string type or the field reference is + an argument of a comparison predicate. + */ + COND *cond= propagate_equal_fields(thd, Context_boolean(), inherited); + cond->update_used_tables(); + DBUG_ASSERT(cond == this); + DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]); + return cond; +} + + +COND *Item_equal::build_equal_items(THD *thd, COND_EQUAL *inherited, + bool link_item_fields, + COND_EQUAL **cond_equal_ref) +{ + COND *cond= Item_func::build_equal_items(thd, inherited, link_item_fields, + cond_equal_ref); + if (cond_equal_ref) + *cond_equal_ref= new (thd->mem_root) COND_EQUAL(this, thd->mem_root); + return cond; +} + + +/** + Build multiple equalities for a condition and all on expressions that + inherit these multiple equalities. + + The function first applies the cond->build_equal_items() method + to build all multiple equalities for condition cond utilizing equalities + referred through the parameter inherited. The extended set of + equalities is returned in the structure referred by the cond_equal_ref + parameter. After this the function calls itself recursively for + all on expressions whose direct references can be found in join_list + and who inherit directly the multiple equalities just having built. + + @note + The on expression used in an outer join operation inherits all equalities + from the on expression of the embedding join, if there is any, or + otherwise - from the where condition. + This fact is not obvious, but presumably can be proved. + Consider the following query: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a + WHERE t1.a=t2.a; + @endcode + If the on expression in the query inherits =(t1.a,t2.a), then we + can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers + the equality t3.a=t4.a. Although the on expression + t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one + in the query the latter can be replaced by the former: the new query + will return the same result set as the original one. + + Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us + to use t1.a=t3.a AND t3.a=t4.a under the on condition: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a + WHERE t1.a=t2.a + @endcode + This query equivalent to: + @code + SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2 + WHERE t1.a=t2.a + @endcode + Similarly the original query can be rewritten to the query: + @code + SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a + WHERE t1.a=t2.a + @endcode + that is equivalent to: + @code + SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1 + WHERE t1.a=t2.a + @endcode + Thus, applying equalities from the where condition we basically + can get more freedom in performing join operations. + Although we don't use this property now, it probably makes sense to use + it in the future. + @param thd Thread handler + @param cond condition to build the multiple equalities for + @param inherited path to all inherited multiple equality items + @param join_list list of join tables to which the condition + refers to + @ignore_on_conds TRUE <-> do not build multiple equalities + for on expressions + @param[out] cond_equal_ref pointer to the structure to place built + equalities in + @param link_equal_items equal fields are to be linked + + @return + pointer to the transformed condition containing multiple equalities +*/ + +static COND *build_equal_items(JOIN *join, COND *cond, + COND_EQUAL *inherited, + List<TABLE_LIST> *join_list, + bool ignore_on_conds, + COND_EQUAL **cond_equal_ref, + bool link_equal_fields) +{ + THD *thd= join->thd; + + *cond_equal_ref= NULL; + + if (cond) + { + cond= cond->build_equal_items(thd, inherited, link_equal_fields, + cond_equal_ref); + if (*cond_equal_ref) + { + (*cond_equal_ref)->upper_levels= inherited; + inherited= *cond_equal_ref; + } + } + + if (join_list && !ignore_on_conds) + { + TABLE_LIST *table; + List_iterator<TABLE_LIST> li(*join_list); + + while ((table= li++)) + { + if (table->on_expr) + { + List<TABLE_LIST> *nested_join_list= table->nested_join ? + &table->nested_join->join_list : NULL; + /* + We can modify table->on_expr because its old value will + be restored before re-execution of PS/SP. + */ + table->on_expr= build_equal_items(join, table->on_expr, inherited, + nested_join_list, ignore_on_conds, + &table->cond_equal); + if (unlikely(join->thd->trace_started())) + { + const char *table_name; + if (table->nested_join) + table_name= table->nested_join->join_list.head()->alias.str; + else + table_name= table->alias.str; + trace_condition(join->thd, "ON expr", "build_equal_items", + table->on_expr, table_name); + } + } + } + } + + return cond; +} + + +/** + Compare field items by table order in the execution plan. + + If field1 and field2 belong to different tables then + field1 considered as better than field2 if the table containing + field1 is accessed earlier than the table containing field2. + The function finds out what of two fields is better according + this criteria. + If field1 and field2 belong to the same table then the result + of comparison depends on whether the fields are parts of + the key that are used to access this table. + + @param field1 first field item to compare + @param field2 second field item to compare + @param table_join_idx index to tables determining table order + + @retval + 1 if field1 is better than field2 + @retval + -1 if field2 is better than field1 + @retval + 0 otherwise +*/ + +static int compare_fields_by_table_order(Item *field1, + Item *field2, + void *table_join_idx) +{ + int cmp= 0; + bool outer_ref= 0; + Item *field1_real= field1->real_item(); + Item *field2_real= field2->real_item(); + + if (field1->const_item() || field1_real->const_item()) + return -1; + if (field2->const_item() || field2_real->const_item()) + return 1; + Item_field *f1= (Item_field *) field1_real; + Item_field *f2= (Item_field *) field2_real; + if (f1->used_tables() & OUTER_REF_TABLE_BIT) + { + outer_ref= 1; + cmp= -1; + } + if (f2->used_tables() & OUTER_REF_TABLE_BIT) + { + outer_ref= 1; + cmp++; + } + if (outer_ref) + return cmp; + JOIN_TAB **idx= (JOIN_TAB **) table_join_idx; + + JOIN_TAB *tab1= idx[f1->field->table->tablenr]; + JOIN_TAB *tab2= idx[f2->field->table->tablenr]; + + /* + if one of the table is inside a merged SJM nest and another one isn't, + compare SJM bush roots of the tables. + */ + if (tab1->bush_root_tab != tab2->bush_root_tab) + { + if (tab1->bush_root_tab) + tab1= tab1->bush_root_tab; + + if (tab2->bush_root_tab) + tab2= tab2->bush_root_tab; + } + + cmp= (int)(tab1 - tab2); + + if (!cmp) + { + /* Fields f1, f2 belong to the same table */ + + JOIN_TAB *tab= idx[f1->field->table->tablenr]; + uint keyno= MAX_KEY; + if (tab->ref.key_parts) + keyno= tab->ref.key; + else if (tab->select && tab->select->quick) + keyno = tab->select->quick->index; + if (keyno != MAX_KEY) + { + if (f1->field->part_of_key.is_set(keyno)) + cmp= -1; + if (f2->field->part_of_key.is_set(keyno)) + cmp++; + /* + Here: + if both f1, f2 are components of the key tab->ref.key then cmp==0, + if only f1 is a component of the key then cmp==-1 (f1 is better), + if only f2 is a component of the key then cmp==1, (f2 is better), + if none of f1,f1 is component of the key cmp==0. + */ + if (!cmp) + { + KEY *key_info= tab->table->key_info + keyno; + for (uint i= 0; i < key_info->user_defined_key_parts; i++) + { + Field *fld= key_info->key_part[i].field; + if (fld->eq(f1->field)) + { + cmp= -1; // f1 is better + break; + } + if (fld->eq(f2->field)) + { + cmp= 1; // f2 is better + break; + } + } + } + } + if (!cmp) + cmp= f1->field->field_index-f2->field->field_index; + } + return cmp < 0 ? -1 : (cmp ? 1 : 0); +} + + +static TABLE_LIST* embedding_sjm(Item *item) +{ + Item_field *item_field= (Item_field *) (item->real_item()); + TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding; + if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used) + return nest; + else + return NULL; +} + +/** + Generate minimal set of simple equalities equivalent to a multiple equality. + + The function retrieves the fields of the multiple equality item + item_equal and for each field f: + - if item_equal contains const it generates the equality f=const_item; + - otherwise, if f is not the first field, generates the equality + f=item_equal->get_first(). + All generated equality are added to the cond conjunction. + + @param cond condition to add the generated equality to + @param upper_levels structure to access multiple equality of upper levels + @param item_equal multiple equality to generate simple equality from + + @note + Before generating an equality function checks that it has not + been generated for multiple equalities of the upper levels. + E.g. for the following where condition + WHERE a=5 AND ((a=b AND b=c) OR c>4) + the upper level AND condition will contain =(5,a), + while the lower level AND condition will contain =(5,a,b,c). + When splitting =(5,a,b,c) into a separate equality predicates + we should omit 5=a, as we have it already in the upper level. + The following where condition gives us a more complicated case: + WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ... + Given the tables are accessed in the order t1->t2->t3->t4 for + the selected query execution plan the lower level multiple + equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to + t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be + generated for the upper level. Also t3.c=t4.d will be generated there. + So only t1.a=t3.c should be left in the lower level. + If cond is equal to 0, then not more then one equality is generated + and a pointer to it is returned as the result of the function. + + Equality substutution and semi-join materialization nests: + + In case join order looks like this: + + outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 + + We must not construct equalities like + + outer_tbl1.col = inner_tbl1.col + + because they would get attached to inner_tbl1 and will get evaluated + during materialization phase, when we don't have current value of + outer_tbl1.col. + + Item_equal::get_first() also takes similar measures for dealing with + equality substitution in presense of SJM nests. + + Grep for EqualityPropagationAndSjmNests for a more verbose description. + + @return + - The condition with generated simple equalities or + a pointer to the simple generated equality, if success. + - 0, otherwise. +*/ + +Item *eliminate_item_equal(THD *thd, COND *cond, COND_EQUAL *upper_levels, + Item_equal *item_equal) +{ + List<Item> eq_list; + Item_func_eq *eq_item= 0; + if (((Item *) item_equal)->const_item() && !item_equal->val_int()) + return (Item*) Item_false; + Item *item_const= item_equal->get_const(); + Item_equal_fields_iterator it(*item_equal); + Item *head; + TABLE_LIST *current_sjm= NULL; + Item *current_sjm_head= NULL; + + DBUG_ASSERT(!cond || + cond->is_bool_literal() || + (cond->type() == Item::FUNC_ITEM && + ((Item_func *) cond)->functype() == Item_func::EQ_FUNC) || + (cond->type() == Item::COND_ITEM && + ((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC)); + + /* + Pick the "head" item: the constant one or the first in the join order + (if the first in the join order happends to be inside an SJM nest, that's + ok, because this is where the value will be unpacked after + materialization). + */ + if (item_const) + head= item_const; + else + { + TABLE_LIST *emb_nest; + head= item_equal->get_first(NO_PARTICULAR_TAB, NULL); + it++; + if ((emb_nest= embedding_sjm(head))) + { + current_sjm= emb_nest; + current_sjm_head= head; + } + } + + Item *field_item; + /* + For each other item, generate "item=head" equality (except the tables that + are within SJ-Materialization nests, for those "head" is defined + differently) + */ + while ((field_item= it++)) + { + Item_equal *upper= field_item->find_item_equal(upper_levels); + Item *item= field_item; + TABLE_LIST *field_sjm= embedding_sjm(field_item); + if (!field_sjm) + { + current_sjm= NULL; + current_sjm_head= NULL; + } + + /* + Check if "field_item=head" equality is already guaranteed to be true + on upper AND-levels. + */ + if (upper) + { + TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field); + Item *upper_const= upper->get_const(); + if (item_const && upper_const) + { + /* + Upper item also has "field_item=const". + Don't produce equality if const is equal to item_const. + */ + Item_func_eq *func= new (thd->mem_root) Item_func_eq(thd, item_const, upper_const); + func->set_cmp_func(thd); + func->quick_fix_field(); + if (func->val_int()) + item= 0; + } + else + { + Item_equal_fields_iterator li(*item_equal); + while ((item= li++) != field_item) + { + if (embedding_sjm(item) == field_sjm && + item->find_item_equal(upper_levels) == upper) + break; + } + } + if (embedding_sjm(field_item) != native_sjm) + item= NULL; /* Don't produce equality */ + } + + bool produce_equality= MY_TEST(item == field_item); + if (!item_const && field_sjm && field_sjm != current_sjm) + { + /* Entering an SJM nest */ + current_sjm_head= field_item; + if (!field_sjm->sj_mat_info->is_sj_scan) + produce_equality= FALSE; + } + + if (produce_equality) + { + if (eq_item && eq_list.push_back(eq_item, thd->mem_root)) + return 0; + + /* + If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality + doesn't include a constant, we should produce equality with the first + of the equal items in this SJM (except for the first element inside the + SJM. For that, we produce the equality with the "head" item). + + In other cases, get the "head" item, which is either first of the + equals on top level, or the constant. + */ + Item *head_item= (!item_const && current_sjm && + current_sjm_head != field_item) ? current_sjm_head: head; + eq_item= new (thd->mem_root) Item_func_eq(thd, + field_item->remove_item_direct_ref(), + head_item->remove_item_direct_ref()); + + if (!eq_item || eq_item->set_cmp_func(thd)) + return 0; + eq_item->quick_fix_field(); + } + current_sjm= field_sjm; + } + + /* + We have produced zero, one, or more pair-wise equalities eq_i. We want to + return an expression in form: + + cond AND eq_1 AND eq_2 AND eq_3 AND ... + + 'cond' is a parameter for this function, which may be NULL, an Item_bool(1), + or an Item_func_eq or an Item_cond_and. + + We want to return a well-formed condition: no nested Item_cond_and objects, + or Item_cond_and with a single child: + - if 'cond' is an Item_cond_and, we add eq_i as its tail + - if 'cond' is Item_bool(1), we return eq_i + - otherwise, we create our own Item_cond_and and put 'cond' at the front of + it. + - if we have only one condition to return, we don't create an Item_cond_and + */ + + if (eq_item && eq_list.push_back(eq_item, thd->mem_root)) + return 0; + COND *res= 0; + switch (eq_list.elements) + { + case 0: + res= cond ? cond : (Item*) Item_true; + break; + case 1: + if (!cond || cond->is_bool_literal()) + res= eq_item; + break; + default: + break; + } + if (!res) + { + if (cond) + { + if (cond->type() == Item::COND_ITEM) + { + res= cond; + ((Item_cond *) res)->add_at_end(&eq_list); + } + else if (eq_list.push_front(cond, thd->mem_root)) + return 0; + } + } + if (!res) + res= new (thd->mem_root) Item_cond_and(thd, eq_list); + if (res) + { + res->quick_fix_field(); + res->update_used_tables(); + } + + return res; +} + + +/** + Substitute every field reference in a condition by the best equal field + and eliminate all multiple equality predicates. + + The function retrieves the cond condition and for each encountered + multiple equality predicate it sorts the field references in it + according to the order of tables specified by the table_join_idx + parameter. Then it eliminates the multiple equality predicate it + replacing it by the conjunction of simple equality predicates + equating every field from the multiple equality to the first + field in it, or to the constant, if there is any. + After this the function retrieves all other conjuncted + predicates substitute every field reference by the field reference + to the first equal field or equal constant if there are any. + + @param context_tab Join tab that 'cond' will be attached to, or + NO_PARTICULAR_TAB. See notes above. + @param cond condition to process + @param cond_equal multiple equalities to take into consideration + @param table_join_idx index to tables determining field preference + @param do_substitution if false: do not do any field substitution + + @note + At the first glance full sort of fields in multiple equality + seems to be an overkill. Yet it's not the case due to possible + new fields in multiple equality item of lower levels. We want + the order in them to comply with the order of upper levels. + + context_tab may be used to specify which join tab `cond` will be + attached to. There are two possible cases: + + 1. context_tab != NO_PARTICULAR_TAB + We're doing substitution for an Item which will be evaluated in the + context of a particular item. For example, if the optimizer does a + ref access on "tbl1.key= expr" then + = equality substitution will be perfomed on 'expr' + = it is known in advance that 'expr' will be evaluated when + table t1 is accessed. + Note that in this kind of substution we never have to replace Item_equal + objects. For example, for + + t.key= func(col1=col2 AND col2=const) + + we will not build Item_equal or do equality substution (if we decide to, + this function will need to be fixed to handle it) + + 2. context_tab == NO_PARTICULAR_TAB + We're doing substitution in WHERE/ON condition, which is not yet + attached to any particular join_tab. We will use information about the + chosen join order to make "optimal" substitions, i.e. those that allow + to apply filtering as soon as possible. See eliminate_item_equal() and + Item_equal::get_first() for details. + + @return + The transformed condition, or NULL in case of error +*/ + +static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab, + COND *cond, + COND_EQUAL *cond_equal, + void *table_join_idx, + bool do_substitution) +{ + Item_equal *item_equal; + COND *org_cond= cond; // Return this in case of fatal error + + if (cond->type() == Item::COND_ITEM) + { + List<Item> *cond_list= ((Item_cond*) cond)->argument_list(); + + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + if (and_level) + { + cond_equal= &((Item_cond_and *) cond)->m_cond_equal; + cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */ + + List_iterator_fast<Item_equal> it(cond_equal->current_level); + while ((item_equal= it++)) + { + item_equal->sort(&compare_fields_by_table_order, table_join_idx); + } + } + + List_iterator<Item> li(*cond_list); + Item *item; + while ((item= li++)) + { + Item *new_item= substitute_for_best_equal_field(thd, context_tab, + item, cond_equal, + table_join_idx, + do_substitution); + /* + This works OK with PS/SP re-execution as changes are made to + the arguments of AND/OR items only + */ + if (new_item && new_item != item) + li.replace(new_item); + } + + if (and_level) + { + COND *eq_cond= 0; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + bool false_eq_cond= FALSE; + bool all_deleted= true; + while ((item_equal= it++)) + { + if (item_equal->get_extraction_flag() == MARKER_DELETION) + continue; + all_deleted= false; + eq_cond= eliminate_item_equal(thd, eq_cond, cond_equal->upper_levels, + item_equal); + if (!eq_cond) + { + eq_cond= 0; + break; + } + else if (eq_cond->is_bool_literal() && !eq_cond->val_bool()) + { + /* + This occurs when eliminate_item_equal() founds that cond is + always false and substitutes it with Item_int 0. + Due to this, value of item_equal will be 0, so just return it. + */ + cond= eq_cond; + false_eq_cond= TRUE; + break; + } + } + if (eq_cond && !false_eq_cond) + { + /* Insert the generated equalities before all other conditions */ + if (eq_cond->type() == Item::COND_ITEM) + ((Item_cond *) cond)->add_at_head( + ((Item_cond *) eq_cond)->argument_list()); + else + { + if (cond_list->is_empty()) + cond= eq_cond; + else + { + /* Do not add an equality condition if it's always true */ + if (!eq_cond->is_bool_literal() && + cond_list->push_front(eq_cond, thd->mem_root)) + eq_cond= 0; + } + } + } + if (!eq_cond && !all_deleted) + { + /* + We are out of memory doing the transformation. + This is a fatal error now. However we bail out by returning the + original condition that we had before we started the transformation. + */ + cond_list->append((List<Item> *) &cond_equal->current_level); + } + } + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + item_equal= (Item_equal *) cond; + item_equal->sort(&compare_fields_by_table_order, table_join_idx); + cond_equal= item_equal->upper_levels; + if (cond_equal && cond_equal->current_level.head() == item_equal) + cond_equal= cond_equal->upper_levels; + if (item_equal->get_extraction_flag() == MARKER_DELETION) + return 0; + cond= eliminate_item_equal(thd, 0, cond_equal, item_equal); + return cond ? cond : org_cond; + } + else if (do_substitution) + { + while (cond_equal) + { + List_iterator_fast<Item_equal> it(cond_equal->current_level); + while((item_equal= it++)) + { + REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab}; + if (!(cond= cond->transform(thd, &Item::replace_equal_field, + (uchar *) &arg))) + return 0; + } + cond_equal= cond_equal->upper_levels; + } + } + return cond; +} + + +/** + Check appearance of new constant items in multiple equalities + of a condition after reading a constant table. + + The function retrieves the cond condition and for each encountered + multiple equality checks whether new constants have appeared after + reading the constant (single row) table tab. If so it adjusts + the multiple equality appropriately. + + @param cond condition whose multiple equalities are to be checked + @param table constant table that has been read + @param const_key mark key parts as constant +*/ + +static void update_const_equal_items(THD *thd, COND *cond, JOIN_TAB *tab, + bool const_key) +{ + if (!(cond->used_tables() & tab->table->map)) + return; + + if (cond->type() == Item::COND_ITEM) + { + List<Item> *cond_list= ((Item_cond*) cond)->argument_list(); + List_iterator_fast<Item> li(*cond_list); + Item *item; + while ((item= li++)) + update_const_equal_items(thd, item, tab, + cond->is_top_level_item() && + ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC); + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + Item_equal *item_equal= (Item_equal *) cond; + bool contained_const= item_equal->get_const() != NULL; + item_equal->update_const(thd); + if (!contained_const && item_equal->get_const()) + { + /* Update keys for range analysis */ + Item_equal_fields_iterator it(*item_equal); + while (it++) + { + Field *field= it.get_curr_field(); + JOIN_TAB *stat= field->table->reginfo.join_tab; + key_map possible_keys= field->key_start; + possible_keys.intersect(field->table->keys_in_use_for_query); + stat[0].const_keys.merge(possible_keys); + + /* + For each field in the multiple equality (for which we know that it + is a constant) we have to find its corresponding key part, and set + that key part in const_key_parts. + */ + if (!possible_keys.is_clear_all()) + { + TABLE *field_tab= field->table; + KEYUSE *use; + for (use= stat->keyuse; use && use->table == field_tab; use++) + if (const_key && + !use->is_for_hash_join() && possible_keys.is_set(use->key) && + field_tab->key_info[use->key].key_part[use->keypart].field == + field) + field_tab->const_key_parts[use->key]|= use->keypart_map; + } + } + } + } +} + + +/** + Check if + WHERE expr=value AND expr=const + can be rewritten as: + WHERE const=value AND expr=const + + @param target - the target operator whose "expr" argument will be + replaced to "const". + @param target_expr - the target's "expr" which will be replaced to "const". + @param target_value - the target's second argument, it will remain unchanged. + @param source - the equality expression ("=" or "<=>") that + can be used to rewrite the "target" part + (under certain conditions, see the code). + @param source_expr - the source's "expr". It should be exactly equal to + the target's "expr" to make condition rewrite possible. + @param source_const - the source's "const" argument, it will be inserted + into "target" instead of "expr". +*/ +static bool +can_change_cond_ref_to_const(Item_bool_func2 *target, + Item *target_expr, Item *target_value, + Item_bool_func2 *source, + Item *source_expr, Item *source_const) +{ + return target_expr->eq(source_expr,0) && + target_value != source_const && + target->compare_type_handler()-> + can_change_cond_ref_to_const(target, target_expr, target_value, + source, source_expr, source_const); +} + + +/* + change field = field to field = const for each found field = const in the + and_level +*/ + +static void +change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list, + Item *and_father, Item *cond, + Item_bool_func2 *field_value_owner, + Item *field, Item *value) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item, + field_value_owner, field, value); + return; + } + if (cond->eq_cmp_result() == Item::COND_OK) + return; // Not a boolean function + + Item_bool_func2 *func= (Item_bool_func2*) cond; + Item **args= func->arguments(); + Item *left_item= args[0]; + Item *right_item= args[1]; + Item_func::Functype functype= func->functype(); + + if (can_change_cond_ref_to_const(func, right_item, left_item, + field_value_owner, field, value)) + { + Item *tmp=value->clone_item(thd); + if (tmp) + { + tmp->collation.set(right_item->collation); + thd->change_item_tree(args + 1, tmp); + func->update_used_tables(); + if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC) + && and_father != cond && !left_item->const_item()) + { + cond->marker= MARKER_CHANGE_COND; + COND_CMP *tmp2; + /* Will work, even if malloc would fail */ + if ((tmp2= new (thd->mem_root) COND_CMP(and_father, func))) + save_list->push_back(tmp2); + } + /* + LIKE can be optimized for BINARY/VARBINARY/BLOB columns, e.g.: + + from: WHERE CONCAT(c1)='const1' AND CONCAT(c1) LIKE 'const2' + to: WHERE CONCAT(c1)='const1' AND 'const1' LIKE 'const2' + + So make sure to use set_cmp_func() only for non-LIKE operators. + */ + if (functype != Item_func::LIKE_FUNC) + ((Item_bool_rowready_func2*) func)->set_cmp_func(thd); + } + } + else if (can_change_cond_ref_to_const(func, left_item, right_item, + field_value_owner, field, value)) + { + Item *tmp= value->clone_item(thd); + if (tmp) + { + tmp->collation.set(left_item->collation); + thd->change_item_tree(args, tmp); + value= tmp; + func->update_used_tables(); + if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC) + && and_father != cond && !right_item->const_item()) + { + args[0]= args[1]; // For easy check + thd->change_item_tree(args + 1, value); + cond->marker= MARKER_CHANGE_COND; + COND_CMP *tmp2; + /* Will work, even if malloc would fail */ + if ((tmp2=new (thd->mem_root) COND_CMP(and_father, func))) + save_list->push_back(tmp2); + } + if (functype != Item_func::LIKE_FUNC) + ((Item_bool_rowready_func2*) func)->set_cmp_func(thd); + } + } +} + + +static void +propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list, + COND *and_father, COND *cond) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == + Item_func::COND_AND_FUNC; + List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + I_List<COND_CMP> save; + while ((item=li++)) + { + propagate_cond_constants(thd, &save,and_level ? cond : item, item); + } + if (and_level) + { // Handle other found items + I_List_iterator<COND_CMP> cond_itr(save); + COND_CMP *cond_cmp; + while ((cond_cmp=cond_itr++)) + { + Item **args= cond_cmp->cmp_func->arguments(); + if (!args[0]->const_item()) + change_cond_ref_to_const(thd, &save,cond_cmp->and_level, + cond_cmp->and_level, + cond_cmp->cmp_func, args[0], args[1]); + } + } + } + else if (and_father != cond && cond->marker == MARKER_UNUSED) // In a AND group + { + if (cond->type() == Item::FUNC_ITEM && + (((Item_func*) cond)->functype() == Item_func::EQ_FUNC || + ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC)) + { + Item_bool_func2 *func= dynamic_cast<Item_bool_func2*>(cond); + Item **args= func->arguments(); + bool left_const= args[0]->can_eval_in_optimize(); + bool right_const= args[1]->can_eval_in_optimize(); + if (!(left_const && right_const) && + args[0]->cmp_type() == args[1]->cmp_type()) + { + if (right_const) + { + resolve_const_item(thd, &args[1], args[0]); + func->update_used_tables(); + change_cond_ref_to_const(thd, save_list, and_father, and_father, + func, args[0], args[1]); + } + else if (left_const) + { + resolve_const_item(thd, &args[0], args[1]); + func->update_used_tables(); + change_cond_ref_to_const(thd, save_list, and_father, and_father, + func, args[1], args[0]); + } + } + } + } +} + +/** + Simplify joins replacing outer joins by inner joins whenever it's + possible. + + The function, during a retrieval of join_list, eliminates those + outer joins that can be converted into inner join, possibly nested. + It also moves the on expressions for the converted outer joins + and from inner joins to conds. + The function also calculates some attributes for nested joins: + - used_tables + - not_null_tables + - dep_tables. + - on_expr_dep_tables + The first two attributes are used to test whether an outer join can + be substituted for an inner join. The third attribute represents the + relation 'to be dependent on' for tables. If table t2 is dependent + on table t1, then in any evaluated execution plan table access to + table t2 must precede access to table t2. This relation is used also + to check whether the query contains invalid cross-references. + The forth attribute is an auxiliary one and is used to calculate + dep_tables. + As the attribute dep_tables qualifies possibles orders of tables in the + execution plan, the dependencies required by the straight join + modifiers are reflected in this attribute as well. + The function also removes all braces that can be removed from the join + expression without changing its meaning. + + @note + An outer join can be replaced by an inner join if the where condition + or the on expression for an embedding nested join contains a conjunctive + predicate rejecting null values for some attribute of the inner tables. + + E.g. in the query: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + the predicate t2.b < 5 rejects nulls. + The query is converted first to: + @code + SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + then to the equivalent form: + @code + SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a + @endcode + + + Similarly the following query: + @code + SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b + WHERE t2.c < 5 + @endcode + is converted to: + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b + + @endcode + + One conversion might trigger another: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a + LEFT JOIN t3 ON t3.b=t2.b + WHERE t3 IS NOT NULL => + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b => + SELECT * FROM t1, t2, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a + @endcode + + The function removes all unnecessary braces from the expression + produced by the conversions. + E.g. + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + @endcode + finally is converted to: + @code + SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + + @endcode + + + It also will remove braces from the following queries: + @code + SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b + SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b. + @endcode + + The benefit of this simplification procedure is that it might return + a query for which the optimizer can evaluate execution plan with more + join orders. With a left join operation the optimizer does not + consider any plan where one of the inner tables is before some of outer + tables. + + IMPLEMENTATION + The function is implemented by a recursive procedure. On the recursive + ascent all attributes are calculated, all outer joins that can be + converted are replaced and then all unnecessary braces are removed. + As join list contains join tables in the reverse order sequential + elimination of outer joins does not require extra recursive calls. + + SEMI-JOIN NOTES + Remove all semi-joins that have are within another semi-join (i.e. have + an "ancestor" semi-join nest) + + EXAMPLES + Here is an example of a join query with invalid cross references: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b + @endcode + + @param join reference to the query info + @param join_list list representation of the join to be converted + @param conds conditions to add on expressions for converted joins + @param top true <=> conds is the where condition + @param in_sj TRUE <=> processing semi-join nest's children + @return + - The new condition, if success + - 0, otherwise +*/ + +static COND * +simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top, + bool in_sj) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + TABLE_LIST *prev_table= 0; + List_iterator<TABLE_LIST> li(*join_list); + bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); + DBUG_ENTER("simplify_joins"); + + /* + Try to simplify join operations from join_list. + The most outer join operation is checked for conversion first. + */ + while ((table= li++)) + { + table_map used_tables; + table_map not_null_tables= (table_map) 0; + + if ((nested_join= table->nested_join)) + { + /* + If the element of join_list is a nested join apply + the procedure to its nested join list first. + */ + if (table->on_expr) + { + Item *expr= table->on_expr; + /* + If an on expression E is attached to the table, + check all null rejected predicates in this expression. + If such a predicate over an attribute belonging to + an inner table of an embedded outer join is found, + the outer join is converted to an inner join and + the corresponding on expression is added to E. + */ + expr= simplify_joins(join, &nested_join->join_list, + expr, FALSE, in_sj || table->sj_on_expr); + + if (!table->prep_on_expr || expr != table->on_expr) + { + DBUG_ASSERT(expr); + + table->on_expr= expr; + table->prep_on_expr= expr->copy_andor_structure(join->thd); + } + } + nested_join->used_tables= (table_map) 0; + nested_join->not_null_tables=(table_map) 0; + conds= simplify_joins(join, &nested_join->join_list, conds, top, + in_sj || table->sj_on_expr); + used_tables= nested_join->used_tables; + not_null_tables= nested_join->not_null_tables; + /* The following two might become unequal after table elimination: */ + nested_join->n_tables= nested_join->join_list.elements; + } + else + { + if (!table->prep_on_expr) + table->prep_on_expr= table->on_expr; + used_tables= table->get_map(); + if (conds) + not_null_tables= conds->not_null_tables(); + } + + if (table->embedding) + { + table->embedding->nested_join->used_tables|= used_tables; + table->embedding->nested_join->not_null_tables|= not_null_tables; + } + + if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) || + (used_tables & not_null_tables)) + { + /* + For some of the inner tables there are conjunctive predicates + that reject nulls => the outer join can be replaced by an inner join. + */ + if (table->outer_join && !table->embedding && table->table) + table->table->maybe_null= FALSE; + table->outer_join= 0; + if (!(straight_join || table->straight)) + { + table->dep_tables= 0; + TABLE_LIST *embedding= table->embedding; + while (embedding) + { + if (embedding->nested_join->join_list.head()->outer_join) + { + if (!embedding->sj_subq_pred) + table->dep_tables= embedding->dep_tables; + break; + } + embedding= embedding->embedding; + } + } + if (table->on_expr) + { + /* Add ON expression to the WHERE or upper-level ON condition. */ + if (conds) + { + conds= and_conds(join->thd, conds, table->on_expr); + conds->top_level_item(); + /* conds is always a new item as both cond and on_expr existed */ + DBUG_ASSERT(!conds->fixed()); + conds->fix_fields(join->thd, &conds); + } + else + conds= table->on_expr; + table->prep_on_expr= table->on_expr= 0; + } + } + + /* + Only inner tables of non-convertible outer joins + remain with on_expr. + */ + if (table->on_expr) + { + table_map table_on_expr_used_tables= table->on_expr->used_tables(); + table->dep_tables|= table_on_expr_used_tables; + if (table->embedding) + { + table->dep_tables&= ~table->embedding->nested_join->used_tables; + /* + Embedding table depends on tables used + in embedded on expressions. + */ + table->embedding->on_expr_dep_tables|= table_on_expr_used_tables; + } + else + table->dep_tables&= ~table->get_map(); + } + + if (prev_table) + { + /* The order of tables is reverse: prev_table follows table */ + if (prev_table->straight || straight_join) + prev_table->dep_tables|= used_tables; + if (prev_table->on_expr) + { + prev_table->dep_tables|= table->on_expr_dep_tables; + table_map prev_used_tables= prev_table->nested_join ? + prev_table->nested_join->used_tables : + prev_table->get_map(); + /* + If on expression contains only references to inner tables + we still make the inner tables dependent on the outer tables. + It would be enough to set dependency only on one outer table + for them. Yet this is really a rare case. + Note: + RAND_TABLE_BIT mask should not be counted as it + prevents update of inner table dependences. + For example it might happen if RAND() function + is used in JOIN ON clause. + */ + if (!((prev_table->on_expr->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) & + ~prev_used_tables)) + prev_table->dep_tables|= used_tables; + } + } + prev_table= table; + } + + /* + Flatten nested joins that can be flattened. + no ON expression and not a semi-join => can be flattened. + */ + li.rewind(); + while ((table= li++)) + { + nested_join= table->nested_join; + if (table->sj_on_expr && !in_sj) + { + /* + If this is a semi-join that is not contained within another semi-join + leave it intact (otherwise it is flattened) + */ + /* + Make sure that any semi-join appear in + the join->select_lex->sj_nests list only once + */ + List_iterator_fast<TABLE_LIST> sj_it(join->select_lex->sj_nests); + TABLE_LIST *sj_nest; + while ((sj_nest= sj_it++)) + { + if (table == sj_nest) + break; + } + if (sj_nest) + continue; + join->select_lex->sj_nests.push_back(table, join->thd->mem_root); + + /* + Also, walk through semi-join children and mark those that are now + top-level + */ + TABLE_LIST *tbl; + List_iterator<TABLE_LIST> it(nested_join->join_list); + while ((tbl= it++)) + { + if (!tbl->on_expr && tbl->table) + tbl->table->maybe_null= FALSE; + } + } + else if (nested_join && !table->on_expr) + { + TABLE_LIST *tbl; + List_iterator<TABLE_LIST> it(nested_join->join_list); + List<TABLE_LIST> repl_list; + while ((tbl= it++)) + { + tbl->embedding= table->embedding; + if (!tbl->embedding && !tbl->on_expr && tbl->table) + tbl->table->maybe_null= FALSE; + tbl->join_list= table->join_list; + repl_list.push_back(tbl, join->thd->mem_root); + tbl->dep_tables|= table->dep_tables; + } + li.replace(repl_list); + } + } + DBUG_RETURN(conds); +} + + +/** + Assign each nested join structure a bit in nested_join_map. + + Assign each nested join structure (except ones that embed only one element + and so are redundant) a bit in nested_join_map. + + @param join Join being processed + @param join_list List of tables + @param first_unused Number of first unused bit in nested_join_map before the + call + + @note + This function is called after simplify_joins(), when there are no + redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so + we will not run out of bits in nested_join_map. + + @return + First unused bit in nested_join_map after the call. +*/ + +static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list, + uint first_unused) +{ + List_iterator<TABLE_LIST> li(*join_list); + TABLE_LIST *table; + DBUG_ENTER("build_bitmap_for_nested_joins"); + while ((table= li++)) + { + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + /* + It is guaranteed by simplify_joins() function that a nested join + that has only one child represents a single table VIEW (and the child + is an underlying table). We don't assign bits to such nested join + structures because + 1. it is redundant (a "sequence" of one table cannot be interleaved + with anything) + 2. we could run out bits in nested_join_map otherwise. + */ + if (nested_join->n_tables != 1) + { + /* Don't assign bits to sj-nests */ + if (table->on_expr) + nested_join->nj_map= (nested_join_map) 1 << first_unused++; + first_unused= build_bitmap_for_nested_joins(&nested_join->join_list, + first_unused); + } + } + } + DBUG_RETURN(first_unused); +} + + +/** + Set NESTED_JOIN::counter and n_tables in all nested joins in passed list. + + For all nested joins contained in the passed join_list (including its + children), set: + - nested_join->counter=0 + - nested_join->n_tables= {number of non-degenerate direct children}. + + Non-degenerate means non-const base table or a join nest that has a + non-degenerate child. + + @param join_list List of nested joins to process. It may also contain base + tables which will be ignored. +*/ + +static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list) +{ + List_iterator<TABLE_LIST> li(*join_list); + TABLE_LIST *table; + DBUG_ENTER("reset_nj_counters"); + uint n=0; + while ((table= li++)) + { + NESTED_JOIN *nested_join; + bool is_eliminated_nest= FALSE; + if ((nested_join= table->nested_join)) + { + nested_join->counter= 0; + nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list); + if (!nested_join->n_tables) + is_eliminated_nest= TRUE; + } + const table_map removed_tables= join->eliminated_tables | + join->const_table_map; + + if ((table->nested_join && !is_eliminated_nest) || + (!table->nested_join && (table->table->map & ~removed_tables))) + n++; + } + DBUG_RETURN(n); +} + + +/** + Check interleaving with an inner tables of an outer join for + extension table. + + Check if table next_tab can be added to current partial join order, and + if yes, record that it has been added. + + The function assumes that both current partial join order and its + extension with next_tab are valid wrt table dependencies. + + @verbatim + IMPLEMENTATION + LIMITATIONS ON JOIN ORDER + The nested [outer] joins executioner algorithm imposes these + limitations on join order: + 1. "Outer tables first" - any "outer" table must be before any + corresponding "inner" table. + 2. "No interleaving" - tables inside a nested join must form a + continuous sequence in join order (i.e. the sequence must not be + interrupted by tables that are outside of this nested join). + + #1 is checked elsewhere, this function checks #2 provided that #1 has + been already checked. + + WHY NEED NON-INTERLEAVING + Consider an example: + + select * from t0 join t1 left join (t2 join t3) on cond1 + + The join order "t1 t2 t0 t3" is invalid: + + table t0 is outside of the nested join, so WHERE condition + for t0 is attached directly to t0 (without triggers, and it + may be used to access t0). Applying WHERE(t0) to (t2,t0,t3) + record is invalid as we may miss combinations of (t1, t2, t3) + that satisfy condition cond1, and produce a null-complemented + (t1, t2.NULLs, t3.NULLs) row, which should not have been + produced. + + If table t0 is not between t2 and t3, the problem doesn't exist: + If t0 is located after (t2,t3), WHERE(t0) is applied after nested + join processing has finished. + If t0 is located before (t2,t3), predicates like WHERE_cond(t0, t2) + are wrapped into condition triggers, which takes care of correct + nested join processing. + + HOW IT IS IMPLEMENTED + The limitations on join order can be rephrased as follows: for valid + join order one must be able to: + 1. write down the used tables in the join order on one line. + 2. for each nested join, put one '(' and one ')' on the said line + 3. write "LEFT JOIN" and "ON (...)" where appropriate + 4. get a query equivalent to the query we're trying to execute. + + Calls to check_interleaving_with_nj() are equivalent to writing the + above described line from left to right. + + A single check_interleaving_with_nj(A,B) call is equivalent + to writing table B and appropriate brackets on condition that + table A and appropriate brackets is the last what was + written. Graphically the transition is as follows: + + +---- current position + | + ... last_tab ))) | ( next_tab ) )..) | ... + X Y Z | + +- need to move to this + position. + + Notes about the position: + The caller guarantees that there is no more then one X-bracket by + checking "!(remaining_tables & s->dependent)" before calling this + function. X-bracket may have a pair in Y-bracket. + + When "writing" we store/update this auxilary info about the current + position: + 1. join->cur_embedding_map - bitmap of pairs of brackets (aka nested + joins) we've opened but didn't close. + 2. {each NESTED_JOIN structure not simplified away}->counter - number + of this nested join's children that have already been added to to + the partial join order. + @endverbatim + + @param next_tab Table we're going to extend the current partial join with + + @retval + FALSE Join order extended, nested joins info about current join + order (see NOTE section) updated. + @retval + TRUE Requested join order extension not allowed. +*/ + +static bool check_interleaving_with_nj(JOIN_TAB *next_tab) +{ + JOIN *join= next_tab->join; + + if (join->cur_embedding_map & ~next_tab->embedding_map) + { + /* + next_tab is outside of the "pair of brackets" we're currently in. + Cannot add it. + */ + return TRUE; + } + + TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding; + /* + Do update counters for "pairs of brackets" that we've left (marked as + X,Y,Z in the above picture) + */ + for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding) + { + if (!next_emb->sj_on_expr) + { + next_emb->nested_join->counter++; + if (next_emb->nested_join->counter == 1) + { + /* + next_emb is the first table inside a nested join we've "entered". In + the picture above, we're looking at the 'X' bracket. Don't exit yet as + X bracket might have Y pair bracket. + */ + join->cur_embedding_map |= next_emb->nested_join->nj_map; + } + + if (next_emb->nested_join->n_tables != + next_emb->nested_join->counter) + break; + + /* + We're currently at Y or Z-bracket as depicted in the above picture. + Mark that we've left it and continue walking up the brackets hierarchy. + */ + join->cur_embedding_map &= ~next_emb->nested_join->nj_map; + } + } + return FALSE; +} + + +/** + Nested joins perspective: Remove the last table from the join order. + + The algorithm is the reciprocal of check_interleaving_with_nj(), hence + parent join nest nodes are updated only when the last table in its child + node is removed. The ASCII graphic below will clarify. + + %A table nesting such as <tt> t1 x [ ( t2 x t3 ) x ( t4 x t5 ) ] </tt>is + represented by the below join nest tree. + + @verbatim + NJ1 + _/ / \ + _/ / NJ2 + _/ / / \ + / / / \ + t1 x [ (t2 x t3) x (t4 x t5) ] + @endverbatim + + At the point in time when check_interleaving_with_nj() adds the table t5 to + the query execution plan, QEP, it also directs the node named NJ2 to mark + the table as covered. NJ2 does so by incrementing its @c counter + member. Since all of NJ2's tables are now covered by the QEP, the algorithm + proceeds up the tree to NJ1, incrementing its counter as well. All join + nests are now completely covered by the QEP. + + restore_prev_nj_state() does the above in reverse. As seen above, the node + NJ1 contains the nodes t2, t3, and NJ2. Its counter being equal to 3 means + that the plan covers t2, t3, and NJ2, @e and that the sub-plan (t4 x t5) + completely covers NJ2. The removal of t5 from the partial plan will first + decrement NJ2's counter to 1. It will then detect that NJ2 went from being + completely to partially covered, and hence the algorithm must continue + upwards to NJ1 and decrement its counter to 2. %A subsequent removal of t4 + will however not influence NJ1 since it did not un-cover the last table in + NJ2. + + SYNOPSIS + restore_prev_nj_state() + last join table to remove, it is assumed to be the last in current + partial join order. + + DESCRIPTION + + Remove the last table from the partial join order and update the nested + joins counters and join->cur_embedding_map. It is ok to call this + function for the first table in join order (for which + check_interleaving_with_nj has not been called) + + @param last join table to remove, it is assumed to be the last in current + partial join order. +*/ + +static void restore_prev_nj_state(JOIN_TAB *last) +{ + TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding; + JOIN *join= last->join; + for (;last_emb != NULL && last_emb != join->emb_sjm_nest; + last_emb= last_emb->embedding) + { + if (!last_emb->sj_on_expr) + { + NESTED_JOIN *nest= last_emb->nested_join; + DBUG_ASSERT(nest->counter > 0); + + bool was_fully_covered= nest->is_fully_covered(); + + join->cur_embedding_map|= nest->nj_map; + + if (--nest->counter == 0) + join->cur_embedding_map&= ~nest->nj_map; + + if (!was_fully_covered) + break; + } + } +} + + +/* + Compute allowed_top_level_tables - a bitmap of tables one can put into the + join order if the last table in the join prefix is not inside any outer + join nest. + + NESTED_JOIN::direct_children_map - a bitmap of tables ... if the last + table in the join prefix is inside the join nest. + + Note: it looks like a sensible way to do this is a top-down descent on + JOIN::join_list, but apparently that list is missing I_S tables. + e.g. for SHOW TABLES WHERE col IN (SELECT ...) it will just have a + semi-join nest. +*/ + +void JOIN::calc_allowed_top_level_tables(SELECT_LEX *lex) +{ + TABLE_LIST *tl; + List_iterator<TABLE_LIST> ti(lex->leaf_tables); + DBUG_ENTER("JOIN::calc_allowed_top_level_tables"); + DBUG_ASSERT(allowed_top_level_tables == 0); // Should only be called once + + while ((tl= ti++)) + { + table_map map; + TABLE_LIST *embedding= tl->embedding; + + if (tl->table) + map= tl->table->map; + else + { + DBUG_ASSERT(tl->jtbm_subselect); + map= table_map(1) << tl->jtbm_table_no; + } + + if (!(embedding= tl->embedding)) + { + allowed_top_level_tables |= map; + continue; + } + + // Walk out of any semi-join nests + while (embedding && !embedding->on_expr) + { + // semi-join nest or an INSERT-INTO view... + embedding->nested_join->direct_children_map |= map; + embedding= embedding->embedding; + } + + // Ok we are in the parent nested outer join nest. + if (!embedding) + { + allowed_top_level_tables |= map; + continue; + } + embedding->nested_join->direct_children_map |= map; + + // Walk to grand-parent join nest. + embedding= embedding->embedding; + + // Walk out of any semi-join nests + while (embedding && !embedding->on_expr) + { + DBUG_ASSERT(embedding->sj_on_expr); + embedding->nested_join->direct_children_map |= map; + embedding= embedding->embedding; + } + + if (embedding) + { + DBUG_ASSERT(embedding->on_expr); // Impossible, see above + embedding->nested_join->direct_children_map |= map; + } + else + allowed_top_level_tables |= map; + } + DBUG_VOID_RETURN; +} + + +/* + Get the tables that one is allowed to have as the next table in the + current plan +*/ + +table_map JOIN::get_allowed_nj_tables(uint idx) +{ + TABLE_LIST *last_emb; + if (idx > const_tables && + (last_emb= positions[idx-1].table->table->pos_in_table_list->embedding)) + { + for (;last_emb && last_emb != emb_sjm_nest; + last_emb= last_emb->embedding) + { + if (!last_emb->sj_on_expr) + { + NESTED_JOIN *nest= last_emb->nested_join; + if (!nest->is_fully_covered()) + { + // Return tables that are direct members of this join nest + return nest->direct_children_map; + } + } + } + } + // Return bitmap of tables not in any join nest + if (emb_sjm_nest) + return emb_sjm_nest->nested_join->direct_children_map; + return allowed_top_level_tables; +} + + +/* + Change access methods not to use join buffering and adjust costs accordingly + + SYNOPSIS + optimize_wo_join_buffering() + join + first_tab The first tab to do re-optimization for + last_tab The last tab to do re-optimization for + last_remaining_tables Bitmap of tables that are not in the + [0...last_tab] join prefix + first_alt TRUE <=> Use the LooseScan plan for the first_tab + no_jbuf_before Don't allow to use join buffering before this + table + reopt_rec_count OUT New output record count + reopt_cost OUT New join prefix cost + + DESCRIPTION + Given a join prefix [0; ... first_tab], change the access to the tables + in the [first_tab; last_tab] not to use join buffering. This is needed + because some semi-join strategies cannot be used together with the join + buffering. + In general case the best table order in [first_tab; last_tab] range with + join buffering is different from the best order without join buffering but + we don't try finding a better join order. (TODO ask Igor why did we + chose not to do this in the end. that's actually the difference from the + forking approach) +*/ + +void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, + table_map last_remaining_tables, + bool first_alt, uint no_jbuf_before, + double *outer_rec_count, double *reopt_cost) +{ + double cost, rec_count; + table_map reopt_remaining_tables= last_remaining_tables; + uint i; + THD *thd= join->thd; + Json_writer_temp_disable trace_wo_join_buffering(thd); + + if (first_tab > join->const_tables) + { + cost= join->positions[first_tab - 1].prefix_cost; + rec_count= join->positions[first_tab - 1].prefix_record_count; + } + else + { + cost= 0.0; + rec_count= 1; + } + + *outer_rec_count= rec_count; + for (i= first_tab; i <= last_tab; i++) + reopt_remaining_tables |= join->positions[i].table->table->map; + + /* + best_access_path() optimization depends on the value of + join->cur_sj_inner_tables. Our goal in this function is to do a + re-optimization with disabled join buffering, but no other changes. + In order to achieve this, cur_sj_inner_tables needs have the same + value it had during the original invocations of best_access_path. + + We know that this function, optimize_wo_join_buffering() is called to + re-optimize semi-join join order range, which allows to conclude that + the "original" value of cur_sj_inner_tables was 0. + */ + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; + + for (i= first_tab; i <= last_tab; i++) + { + JOIN_TAB *rs= join->positions[i].table; + POSITION pos, loose_scan_pos; + + if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer) + { + /* Find the best access method that would not use join buffering */ + best_access_path(join, rs, reopt_remaining_tables, + join->positions, i, + TRUE, rec_count, + &pos, &loose_scan_pos); + } + else + pos= join->positions[i]; + + if ((i == first_tab && first_alt)) + pos= loose_scan_pos; + + reopt_remaining_tables &= ~rs->table->map; + rec_count= COST_MULT(rec_count, pos.records_read); + cost= COST_ADD(cost, pos.read_time); + cost= COST_ADD(cost, rec_count / TIME_FOR_COMPARE); + //TODO: take into account join condition selectivity here + double pushdown_cond_selectivity= 1.0; + table_map real_table_bit= rs->table->map; + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + { + pushdown_cond_selectivity= table_cond_selectivity(join, i, rs, + reopt_remaining_tables & + ~real_table_bit); + } + double partial_join_cardinality= rec_count * + pushdown_cond_selectivity; + join->positions[i].partial_join_cardinality= partial_join_cardinality; + (*outer_rec_count) *= pushdown_cond_selectivity; + if (!rs->emb_sj_nest) + *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read); + + } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; + + *reopt_cost= cost; +} + + +static COND * +optimize_cond(JOIN *join, COND *conds, + List<TABLE_LIST> *join_list, bool ignore_on_conds, + Item::cond_result *cond_value, COND_EQUAL **cond_equal, + int flags) +{ + THD *thd= join->thd; + DBUG_ENTER("optimize_cond"); + + if (!conds) + { + *cond_value= Item::COND_TRUE; + if (!ignore_on_conds) + build_equal_items(join, NULL, NULL, join_list, ignore_on_conds, + cond_equal); + } + else + { + /* + Build all multiple equality predicates and eliminate equality + predicates that can be inferred from these multiple equalities. + For each reference of a field included into a multiple equality + that occurs in a function set a pointer to the multiple equality + predicate. Substitute a constant instead of this field if the + multiple equality contains a constant. + */ + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cond(thd, "condition_processing"); + trace_cond.add("condition", join->conds == conds ? "WHERE" : "HAVING") + .add("original_condition", conds); + + Json_writer_array trace_steps(thd, "steps"); + DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY);); + conds= build_equal_items(join, conds, NULL, join_list, + ignore_on_conds, cond_equal, + MY_TEST(flags & OPT_LINK_EQUAL_FIELDS)); + DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); + { + Json_writer_object equal_prop_wrapper(thd); + equal_prop_wrapper.add("transformation", "equality_propagation") + .add("resulting_condition", conds); + } + + /* change field = field to field = const for each found field = const */ + propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds); + /* + Remove all instances of item == item + Remove all and-levels where CONST item != CONST item + */ + DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY);); + { + Json_writer_object const_prop_wrapper(thd); + const_prop_wrapper.add("transformation", "constant_propagation") + .add("resulting_condition", conds); + } + conds= conds->remove_eq_conds(thd, cond_value, true); + if (conds && conds->type() == Item::COND_ITEM && + ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC) + *cond_equal= &((Item_cond_and*) conds)->m_cond_equal; + + { + Json_writer_object cond_removal_wrapper(thd); + cond_removal_wrapper.add("transformation", "trivial_condition_removal") + .add("resulting_condition", conds); + } + DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY);); + } + DBUG_RETURN(conds); +} + + +/** + @brief + Propagate multiple equalities to the sub-expressions of a condition + + @param thd thread handle + @param cond the condition where equalities are to be propagated + @param *new_equalities the multiple equalities to be propagated + @param inherited path to all inherited multiple equality items + @param[out] is_simplifiable_cond 'cond' may be simplified after the + propagation of the equalities + + @details + The function recursively traverses the tree of the condition 'cond' and + for each its AND sub-level of any depth the function merges the multiple + equalities from the list 'new_equalities' into the multiple equalities + attached to the AND item created for this sub-level. + The function also [re]sets references to the equalities formed by the + merges of multiple equalities in all field items occurred in 'cond' + that are encountered in the equalities. + If the result of any merge of multiple equalities is an impossible + condition the function returns TRUE in the parameter is_simplifiable_cond. +*/ + +void propagate_new_equalities(THD *thd, Item *cond, + List<Item_equal> *new_equalities, + COND_EQUAL *inherited, + bool *is_simplifiable_cond) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC; + if (and_level) + { + Item_cond_and *cond_and= (Item_cond_and *) cond; + List<Item_equal> *cond_equalities= &cond_and->m_cond_equal.current_level; + cond_and->m_cond_equal.upper_levels= inherited; + if (!cond_equalities->is_empty() && cond_equalities != new_equalities) + { + Item_equal *equal_item; + List_iterator<Item_equal> it(*new_equalities); + while ((equal_item= it++)) + { + equal_item->merge_into_list(thd, cond_equalities, true, true); + } + List_iterator<Item_equal> ei(*cond_equalities); + while ((equal_item= ei++)) + { + if (equal_item->const_item() && !equal_item->val_int()) + { + *is_simplifiable_cond= true; + return; + } + } + } + } + + Item *item; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + while ((item= li++)) + { + COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ? + &((Item_cond_and *) cond)->m_cond_equal : + inherited; + propagate_new_equalities(thd, item, new_equalities, new_inherited, + is_simplifiable_cond); + } + } + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) + { + Item_equal *equal_item; + List_iterator<Item_equal> it(*new_equalities); + Item_equal *equality= (Item_equal *) cond; + equality->upper_levels= inherited; + while ((equal_item= it++)) + { + equality->merge_with_check(thd, equal_item, true); + } + if (equality->const_item() && !equality->val_int()) + *is_simplifiable_cond= true; + } + else + { + cond= cond->propagate_equal_fields(thd, + Item::Context_boolean(), inherited); + cond->update_used_tables(); + } +} + +/* + Check if cond_is_datetime_is_null() is true for the condition cond, or + for any of its AND/OR-children +*/ +bool cond_has_datetime_is_null(Item *cond) +{ + if (cond_is_datetime_is_null(cond)) + return true; + + if (cond->type() == Item::COND_ITEM) + { + List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list(); + List_iterator<Item> li(*cond_arg_list); + Item *item; + while ((item= li++)) + { + if (cond_has_datetime_is_null(item)) + return true; + } + } + return false; +} + +/* + Check if passed condtition has for of + + not_null_date_col IS NULL + + where not_null_date_col has a datte or datetime type +*/ + +bool cond_is_datetime_is_null(Item *cond) +{ + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC) + { + return ((Item_func_isnull*) cond)->arg_is_datetime_notnull_field(); + } + return false; +} + + +/** + @brief + Evaluate all constant boolean sub-expressions in a condition + + @param thd thread handle + @param cond condition where where to evaluate constant sub-expressions + @param[out] cond_value : the returned value of the condition + (TRUE/FALSE/UNKNOWN: + Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK) + @return + the item that is the result of the substitution of all inexpensive constant + boolean sub-expressions into cond, or, + NULL if the condition is constant and is evaluated to FALSE. + + @details + This function looks for all inexpensive constant boolean sub-expressions in + the given condition 'cond' and substitutes them for their values. + For example, the condition 2 > (5 + 1) or a < (10 / 2) + will be transformed to the condition a < (10 / 2). + Note that a constant sub-expression is evaluated only if it is constant and + inexpensive. A sub-expression with an uncorrelated subquery may be evaluated + only if the subquery is considered as inexpensive. + The function does not evaluate a constant sub-expression if it is not on one + of AND/OR levels of the condition 'cond'. For example, the subquery in the + condition a > (select max(b) from t1 where b > 5) will never be evaluated + by this function. + If a constant boolean sub-expression is evaluated to TRUE then: + - when the sub-expression is a conjunct of an AND formula it is simply + removed from this formula + - when the sub-expression is a disjunct of an OR formula the whole OR + formula is converted to TRUE + If a constant boolean sub-expression is evaluated to FALSE then: + - when the sub-expression is a disjunct of an OR formula it is simply + removed from this formula + - when the sub-expression is a conjuct of an AND formula the whole AND + formula is converted to FALSE + When a disjunct/conjunct is removed from an OR/AND formula it might happen + that there is only one conjunct/disjunct remaining. In this case this + remaining disjunct/conjunct must be merged into underlying AND/OR formula, + because AND/OR levels must alternate in the same way as they alternate + after fix_fields() is called for the original condition. + The specifics of merging a formula f into an AND formula A appears + when A contains multiple equalities and f contains multiple equalities. + In this case the multiple equalities from f and A have to be merged. + After this the resulting multiple equalities have to be propagated into + the all AND/OR levels of the formula A (see propagate_new_equalities()). + The propagation of multiple equalities might result in forming multiple + equalities that are always FALSE. This, in its turn, might trigger further + simplification of the condition. + + @note + EXAMPLE 1: + SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1); + First 1 != 1 will be removed from the second conjunct: + => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5); + Then (b = 5 AND a = 5) will be merged into the top level condition: + => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5); + Then (b = 5), (a = 5) will be propagated into the disjuncs of + (b = 1 OR a = 1): + => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR + (a = 1) AND (b = 5) AND (a = 5)) AND + (b = 5) AND (a = 5) + => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR + (FALSE AND (b = 5))) AND + (b = 5) AND (a = 5) + After this an additional call of remove_eq_conds() converts it + to FALSE + + EXAMPLE 2: + SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1); + => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5); + => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5); + => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR + (a = 5) AND (b = 5) AND (a = 5)) AND + (b = 5) AND (a = 5) + => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR + ((b = 5) AND (a = 5))) AND + (b = 5) AND (a = 5) + After this an additional call of remove_eq_conds() converts it to + => SELECT * FROM t1 WHERE (b = 5) AND (a = 5) +*/ + + +COND * +Item_cond::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + bool and_level= functype() == Item_func::COND_AND_FUNC; + List<Item> *cond_arg_list= argument_list(); + + if (and_level) + { + /* + Remove multiple equalities that became always true (e.g. after + constant row substitution). + They would be removed later in the function anyway, but the list of + them cond_equal.current_level also must be adjusted correspondingly. + So it's easier to do it at one pass through the list of the equalities. + */ + List<Item_equal> *cond_equalities= + &((Item_cond_and *) this)->m_cond_equal.current_level; + cond_arg_list->disjoin((List<Item> *) cond_equalities); + List_iterator<Item_equal> it(*cond_equalities); + Item_equal *eq_item; + while ((eq_item= it++)) + { + if (eq_item->const_item() && eq_item->val_int()) + it.remove(); + } + cond_arg_list->append((List<Item> *) cond_equalities); + } + + List<Item_equal> new_equalities; + List_iterator<Item> li(*cond_arg_list); + bool should_fix_fields= 0; + Item::cond_result tmp_cond_value; + Item *item; + + /* + If the list cond_arg_list became empty then it consisted only + of always true multiple equalities. + */ + *cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE; + + while ((item=li++)) + { + Item *new_item= item->remove_eq_conds(thd, &tmp_cond_value, false); + if (!new_item) + { + /* This can happen only when item is converted to TRUE or FALSE */ + li.remove(); + } + else if (item != new_item) + { + /* + This can happen when: + - item was an OR formula converted to one disjunct + - item was an AND formula converted to one conjunct + In these cases the disjunct/conjunct must be merged into the + argument list of cond. + */ + if (new_item->type() == Item::COND_ITEM && + item->type() == Item::COND_ITEM) + { + DBUG_ASSERT(functype() == ((Item_cond *) new_item)->functype()); + List<Item> *new_item_arg_list= + ((Item_cond *) new_item)->argument_list(); + if (and_level) + { + /* + If new_item is an AND formula then multiple equalities + of new_item_arg_list must merged into multiple equalities + of cond_arg_list. + */ + List<Item_equal> *new_item_equalities= + &((Item_cond_and *) new_item)->m_cond_equal.current_level; + if (!new_item_equalities->is_empty()) + { + /* + Cut the multiple equalities from the new_item_arg_list and + append them on the list new_equalities. Later the equalities + from this list will be merged into the multiple equalities + of cond_arg_list all together. + */ + new_item_arg_list->disjoin((List<Item> *) new_item_equalities); + new_equalities.append(new_item_equalities); + } + } + if (new_item_arg_list->is_empty()) + li.remove(); + else + { + uint cnt= new_item_arg_list->elements; + li.replace(*new_item_arg_list); + /* Make iterator li ignore new items */ + for (cnt--; cnt; cnt--) + li++; + should_fix_fields= 1; + } + } + else if (and_level && + new_item->type() == Item::FUNC_ITEM && + ((Item_func*) new_item)->functype() == + Item_func::MULT_EQUAL_FUNC) + { + li.remove(); + new_equalities.push_back((Item_equal *) new_item, thd->mem_root); + } + else + { + if (new_item->type() == Item::COND_ITEM && + ((Item_cond*) new_item)->functype() == functype()) + { + List<Item> *new_item_arg_list= + ((Item_cond *) new_item)->argument_list(); + uint cnt= new_item_arg_list->elements; + li.replace(*new_item_arg_list); + /* Make iterator li ignore new items */ + for (cnt--; cnt; cnt--) + li++; + } + else + li.replace(new_item); + should_fix_fields= 1; + } + } + if (*cond_value == Item::COND_UNDEF) + *cond_value= tmp_cond_value; + switch (tmp_cond_value) { + case Item::COND_OK: // Not TRUE or FALSE + if (and_level || *cond_value == Item::COND_FALSE) + *cond_value=tmp_cond_value; + break; + case Item::COND_FALSE: + if (and_level) + { + *cond_value= tmp_cond_value; + return (COND*) 0; // Always false + } + break; + case Item::COND_TRUE: + if (!and_level) + { + *cond_value= tmp_cond_value; + return (COND*) 0; // Always true + } + break; + case Item::COND_UNDEF: // Impossible + break; /* purecov: deadcode */ + } + } + COND *cond= this; + if (!new_equalities.is_empty()) + { + DBUG_ASSERT(and_level); + /* + Merge multiple equalities that were cut from the results of + simplification of OR formulas converted into AND formulas. + These multiple equalities are to be merged into the + multiple equalities of cond_arg_list. + */ + COND_EQUAL *cond_equal= &((Item_cond_and *) this)->m_cond_equal; + List<Item_equal> *cond_equalities= &cond_equal->current_level; + cond_arg_list->disjoin((List<Item> *) cond_equalities); + Item_equal *equality; + List_iterator_fast<Item_equal> it(new_equalities); + while ((equality= it++)) + { + equality->upper_levels= cond_equal->upper_levels; + equality->merge_into_list(thd, cond_equalities, false, false); + List_iterator_fast<Item_equal> ei(*cond_equalities); + while ((equality= ei++)) + { + if (equality->const_item() && !equality->val_int()) + { + *cond_value= Item::COND_FALSE; + return (COND*) 0; + } + } + } + cond_arg_list->append((List<Item> *) cond_equalities); + /* + Propagate the newly formed multiple equalities to + the all AND/OR levels of cond + */ + bool is_simplifiable_cond= false; + propagate_new_equalities(thd, this, cond_equalities, + cond_equal->upper_levels, + &is_simplifiable_cond); + /* + If the above propagation of multiple equalities brings us + to multiple equalities that are always FALSE then try to + simplify the condition with remove_eq_cond() again. + */ + if (is_simplifiable_cond) + { + if (!(cond= cond->remove_eq_conds(thd, cond_value, false))) + return cond; + } + should_fix_fields= 1; + } + if (should_fix_fields) + cond->update_used_tables(); + + if (!((Item_cond*) cond)->argument_list()->elements || + *cond_value != Item::COND_OK) + return (COND*) 0; + if (((Item_cond*) cond)->argument_list()->elements == 1) + { // Remove list + item= ((Item_cond*) cond)->argument_list()->head(); + ((Item_cond*) cond)->argument_list()->empty(); + return item; + } + *cond_value= Item::COND_OK; + return cond; +} + + +COND * +Item::remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level_arg) +{ + if (can_eval_in_optimize()) + { + *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE; + return (COND*) 0; + } + *cond_value= Item::COND_OK; + return this; // Point at next and level +} + + +COND * +Item_bool_func2::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + if (can_eval_in_optimize()) + { + *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE; + return (COND*) 0; + } + if ((*cond_value= eq_cmp_result()) != Item::COND_OK) + { + if (args[0]->eq(args[1], true)) + { + if (*cond_value == Item::COND_FALSE || + !args[0]->maybe_null() || functype() == Item_func::EQUAL_FUNC) + return (COND*) 0; // Compare of identical items + } + } + *cond_value= Item::COND_OK; + return this; // Point at next and level +} + + +/** + Remove const and eq items. Return new item, or NULL if no condition + cond_value is set to according: + COND_OK query is possible (field = constant) + COND_TRUE always true ( 1 = 1 ) + COND_FALSE always false ( 1 = 2 ) + + SYNPOSIS + remove_eq_conds() + thd THD environment + cond the condition to handle + cond_value the resulting value of the condition + + NOTES + calls the inner_remove_eq_conds to check all the tree reqursively + + RETURN + *COND with the simplified condition +*/ + +COND * +Item_func_isnull::remove_eq_conds(THD *thd, Item::cond_result *cond_value, + bool top_level_arg) +{ + Item *real_item= args[0]->real_item(); + if (real_item->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*) real_item)->field; + + if ((field->flags & NOT_NULL_FLAG) && + field->type_handler()->cond_notnull_field_isnull_to_field_eq_zero()) + { + /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */ + /* + See BUG#12594011 + Documentation says that + SELECT datetime_notnull d FROM t1 WHERE d IS NULL + shall return rows where d=='0000-00-00' + + Thus, for DATE and DATETIME columns defined as NOT NULL, + "date_notnull IS NULL" has to be modified to + "date_notnull IS NULL OR date_notnull == 0" (if outer join) + "date_notnull == 0" (otherwise) + + */ + + Item *item0= (Item*) Item_false; + Item *eq_cond= new(thd->mem_root) Item_func_eq(thd, args[0], item0); + if (!eq_cond) + return this; + + COND *cond= this; + if (field->table->pos_in_table_list->is_inner_table_of_outer_join()) + { + // outer join: transform "col IS NULL" to "col IS NULL or col=0" + Item *or_cond= new(thd->mem_root) Item_cond_or(thd, eq_cond, this); + if (!or_cond) + return this; + cond= or_cond; + } + else + { + // not outer join: transform "col IS NULL" to "col=0" + cond= eq_cond; + } + + cond->fix_fields(thd, &cond); + /* + Note: although args[0] is a field, cond can still be a constant + (in case field is a part of a dependent subquery). + + Note: we call cond->Item::remove_eq_conds() non-virtually (statically) + for performance purpose. + A non-qualified call, i.e. just cond->remove_eq_conds(), + would call Item_bool_func2::remove_eq_conds() instead, which would + try to do some extra job to detect if args[0] and args[1] are + equivalent items. We know they are not (we have field=0 here). + */ + return cond->Item::remove_eq_conds(thd, cond_value, false); + } + + /* + Handles this special case for some ODBC applications: + The are requesting the row that was just updated with a auto_increment + value with this construct: + + SELECT * from table_name where auto_increment_column IS NULL + This will be changed to: + SELECT * from table_name where auto_increment_column = LAST_INSERT_ID + + Note, this substitution is done if the NULL test is the only condition! + If the NULL test is a part of a more complex condition, it is not + substituted and is treated normally: + WHERE auto_increment IS NULL AND something_else + */ + + if (top_level_arg) // "auto_increment_column IS NULL" is the only condition + { + if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null && + (thd->variables.option_bits & OPTION_AUTO_IS_NULL) && + (thd->first_successful_insert_id_in_prev_stmt > 0 && + thd->substitute_null_with_insert_id)) + { + #ifdef HAVE_QUERY_CACHE + query_cache_abort(thd, &thd->query_cache_tls); + #endif + COND *new_cond, *cond= this; + /* If this fails, we will catch it later before executing query */ + if ((new_cond= new (thd->mem_root) Item_func_eq(thd, args[0], + new (thd->mem_root) Item_int(thd, "last_insert_id()", + thd->read_first_successful_insert_id_in_prev_stmt(), + MY_INT64_NUM_DECIMAL_DIGITS)))) + { + cond= new_cond; + /* + Item_func_eq can't be fixed after creation so we do not check + cond->fixed(), also it do not need tables so we use 0 as second + argument. + */ + cond->fix_fields(thd, &cond); + } + /* + IS NULL should be mapped to LAST_INSERT_ID only for first row, so + clear for next row + */ + thd->substitute_null_with_insert_id= FALSE; + + *cond_value= Item::COND_OK; + return cond; + } + } + } + return Item::remove_eq_conds(thd, cond_value, top_level_arg); +} + + +/** + Check if equality can be used in removing components of GROUP BY/DISTINCT + + @param l the left comparison argument (a field if any) + @param r the right comparison argument (a const of any) + + @details + Checks if an equality predicate can be used to take away + DISTINCT/GROUP BY because it is known to be true for exactly one + distinct value (e.g. <expr> == <const>). + Arguments must be compared in the native type of the left argument + and (for strings) in the native collation of the left argument. + Otherwise, for example, + <string_field> = <int_const> may match more than 1 distinct value or + the <string_field>. + + @note We don't need to aggregate l and r collations here, because r - + the constant item - has already been converted to a proper collation + for comparison. We only need to compare this collation with field's collation. + + @retval true can be used + @retval false cannot be used +*/ + +/* + psergey-todo: this returns false for int_column='1234' (here '1234' is a + constant. Need to discuss this with Bar). + + See also Field::test_if_equality_guaranees_uniqueness(const Item *item); +*/ +static bool +test_if_equality_guarantees_uniqueness(Item *l, Item *r) +{ + return (r->const_item() || !(r->used_tables() & ~OUTER_REF_TABLE_BIT)) && + item_cmp_type(l, r) == l->cmp_type() && + (l->cmp_type() != STRING_RESULT || + l->collation.collation == r->collation.collation); +} + + +/* + Return TRUE if i1 and i2 (if any) are equal items, + or if i1 is a wrapper item around the f2 field. +*/ + +static bool equal(Item *i1, Item *i2, Field *f2) +{ + DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL)); + + if (i2 != NULL) + return i1->eq(i2, 1); + else if (i1->type() == Item::FIELD_ITEM) + return f2->eq(((Item_field *) i1)->field); + else + return FALSE; +} + + +/** + Test if a field or an item is equal to a constant value in WHERE + + @param cond WHERE clause expression + @param comp_item Item to find in WHERE expression + (if comp_field != NULL) + @param comp_field Field to find in WHERE expression + (if comp_item != NULL) + @param[out] const_item intermediate arg, set to Item pointer to NULL + + @return TRUE if the field is a constant value in WHERE + + @note + comp_item and comp_field parameters are mutually exclusive. +*/ +bool +const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field, + Item **const_item) +{ + DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL)); + + Item *intermediate= NULL; + if (const_item == NULL) + const_item= &intermediate; + + if (cond->type() == Item::COND_ITEM) + { + bool and_level= (((Item_cond*) cond)->functype() + == Item_func::COND_AND_FUNC); + List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + bool res=const_expression_in_where(item, comp_item, comp_field, + const_item); + if (res) // Is a const value + { + if (and_level) + return 1; + } + else if (!and_level) + return 0; + } + return and_level ? 0 : 1; + } + else if (cond->eq_cmp_result() != Item::COND_OK) + { // boolean compare function + Item_func* func= (Item_func*) cond; + if (func->functype() != Item_func::EQUAL_FUNC && + func->functype() != Item_func::EQ_FUNC) + return 0; + Item *left_item= ((Item_func*) cond)->arguments()[0]; + Item *right_item= ((Item_func*) cond)->arguments()[1]; + if (equal(left_item, comp_item, comp_field)) + { + if (test_if_equality_guarantees_uniqueness (left_item, right_item)) + { + if (*const_item) + return right_item->eq(*const_item, 1); + *const_item=right_item; + return 1; + } + } + else if (equal(right_item, comp_item, comp_field)) + { + if (test_if_equality_guarantees_uniqueness (right_item, left_item)) + { + if (*const_item) + return left_item->eq(*const_item, 1); + *const_item=left_item; + return 1; + } + } + } + return 0; +} + + +/**************************************************************************** + Create internal temporary table +****************************************************************************/ + +Field *Item::create_tmp_field_int(MEM_ROOT *root, TABLE *table, + uint convert_int_length) +{ + const Type_handler *h= &type_handler_slong; + if (max_char_length() > convert_int_length) + h= &type_handler_slonglong; + if (unsigned_flag) + h= h->type_handler_unsigned(); + return h->make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); +} + +Field *Item::tmp_table_field_from_field_type_maybe_null(MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + bool is_explicit_null) +{ + /* + item->type() == CONST_ITEM excluded due to making fields for counter + With help of Item_uint + */ + DBUG_ASSERT(!param->make_copy_field() || type() == CONST_ITEM); + DBUG_ASSERT(!is_result_field()); + Field *result; + if ((result= tmp_table_field_from_field_type(root, table))) + { + if (result && is_explicit_null) + result->is_created_from_null_item= true; + } + return result; +} + + +Field *Item_sum::create_tmp_field(MEM_ROOT *root, bool group, TABLE *table) +{ + Field *UNINIT_VAR(new_field); + + switch (cmp_type()) { + case REAL_RESULT: + { + new_field= new (root) + Field_double(max_char_length(), maybe_null(), &name, decimals, TRUE); + break; + } + case INT_RESULT: + case TIME_RESULT: + case DECIMAL_RESULT: + case STRING_RESULT: + new_field= tmp_table_field_from_field_type(root, table); + break; + case ROW_RESULT: + // This case should never be chosen + DBUG_ASSERT(0); + new_field= 0; + break; + } + if (new_field) + new_field->init(table); + return new_field; +} + + +/** + Create a temporary field for Item_field (or its descendant), + either direct or referenced by an Item_ref. + + param->modify_item is set when we create a field for an internal temporary + table. In this case we have to ensure the new field name is identical to + the original field name as the field will info will be sent to the client. + In other cases, the field name is set from orig_item or name if org_item is + not set. +*/ + +Field * +Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table, + Item_ref *orig_item, + const Tmp_field_param *param) +{ + DBUG_ASSERT(!is_result_field()); + Field *result; + LEX_CSTRING *new_name= (orig_item ? &orig_item->name : + !param->modify_item() ? &name : + &field->field_name); + + /* + If item have to be able to store NULLs but underlaid field can't do it, + create_tmp_field_from_field() can't be used for tmp field creation. + */ + if (((maybe_null() && in_rollup()) || + (new_table->in_use->create_tmp_table_for_derived && /* for mat. view/dt */ + orig_item && orig_item->maybe_null())) && + !field->maybe_null()) + { + /* + The item the ref points to may have maybe_null flag set while + the ref doesn't have it. This may happen for outer fields + when the outer query decided at some point after name resolution phase + that this field might be null. Take this into account here. + */ + Record_addr rec(orig_item ? orig_item->maybe_null() : maybe_null()); + const Type_handler *handler= type_handler()-> + type_handler_for_tmp_table(this); + result= handler->make_and_init_table_field(root, new_name, + rec, *this, new_table); + } + else if (param->table_cant_handle_bit_fields() && + field->type() == MYSQL_TYPE_BIT) + { + const Type_handler *handler= + Type_handler::type_handler_long_or_longlong(max_char_length(), true); + result= handler->make_and_init_table_field(root, new_name, + Record_addr(maybe_null()), + *this, new_table); + } + else + { + bool tmp_maybe_null= param->modify_item() ? maybe_null() : + field->maybe_null(); + result= field->create_tmp_field(root, new_table, tmp_maybe_null); + if (result && ! param->modify_item()) + result->field_name= *new_name; + } + if (result && param->modify_item()) + result_field= result; + return result; +} + + +Field *Item_field::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + DBUG_ASSERT(!is_result_field()); + Field *result; + src->set_field(field); + if (!(result= create_tmp_field_from_item_field(root, table, NULL, param))) + return NULL; + if (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + field->eq_def(result)) + src->set_default_field(field); + return result; +} + + +Field *Item_default_value::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + if (field->default_value || (field->flags & BLOB_FLAG)) + { + /* + We have to use a copy function when using a blob with default value + as the we have to calculate the default value before we can use it. + */ + get_tmp_field_src(src, param); + Field *result= tmp_table_field_from_field_type(root, table); + if (result && param->modify_item()) + result_field= result; + return result; + } + /* + Same code as in Item_field::create_tmp_field_ex, except no default field + handling + */ + src->set_field(field); + return create_tmp_field_from_item_field(root, table, nullptr, param); +} + + +Field *Item_ref::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + Item *item= real_item(); + DBUG_ASSERT(is_result_field()); + if (item->type() == Item::FIELD_ITEM) + { + Field *result; + Item_field *field= (Item_field*) item; + Tmp_field_param prm2(*param); + prm2.set_modify_item(false); + src->set_field(field->field); + if (!(result= field->create_tmp_field_from_item_field(root, table, + this, &prm2))) + return NULL; + if (param->modify_item()) + result_field= result; + return result; + } + return Item_result_field::create_tmp_field_ex(root, table, src, param); +} + + +void Item_result_field::get_tmp_field_src(Tmp_field_src *src, + const Tmp_field_param *param) +{ + if (param->make_copy_field()) + { + DBUG_ASSERT(result_field); + src->set_field(result_field); + } + else + { + src->set_item_result_field(this); // Save for copy_funcs + } +} + + +Field * +Item_result_field::create_tmp_field_ex_from_handler( + MEM_ROOT *root, + TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param, + const Type_handler *h) +{ + /* + Possible Item types: + - Item_cache_wrapper (only for CREATE..SELECT ?) + - Item_func + - Item_subselect + */ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(is_result_field()); + DBUG_ASSERT(type() != NULL_ITEM); + get_tmp_field_src(src, param); + Field *result; + if ((result= h->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table)) && + param->modify_item()) + result_field= result; + return result; +} + + +Field *Item_func_sp::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + Field *result; + get_tmp_field_src(src, param); + if ((result= sp_result_field->create_tmp_field(root, table))) + { + result->field_name= name; + if (param->modify_item()) + result_field= result; + } + return result; +} + + +static bool make_json_valid_expr(TABLE *table, Field *field) +{ + THD *thd= table->in_use; + Query_arena backup_arena; + Item *expr, *item_field; + + if (!table->expr_arena && table->init_expr_arena(thd->mem_root)) + return 1; + + thd->set_n_backup_active_arena(table->expr_arena, &backup_arena); + if ((item_field= new (thd->mem_root) Item_field(thd, field)) && + (expr= new (thd->mem_root) Item_func_json_valid(thd, item_field))) + field->check_constraint= add_virtual_expression(thd, expr); + thd->restore_active_arena(table->expr_arena, &backup_arena); + return field->check_constraint == NULL; +} + + +/** + Create field for temporary table. + + @param table Temporary table + @param item Item to create a field for + @param type Type of item (normally item->type) + @param copy_func If set and item is a function, store copy of item + in this array + @param from_field if field will be created using other field as example, + pointer example field will be written here + @param default_field If field has a default value field, store it here + @param group 1 if we are going to do a relative group by on result + @param modify_item 1 if item->result_field should point to new item. + This is relevent for how fill_record() is going to + work: + If modify_item is 1 then fill_record() will update + the record in the original table. + If modify_item is 0 then fill_record() will update + the temporary table + @param table_cant_handle_bit_fields + Set to 1 if the temporary table cannot handle bit + fields. Only set for heap tables when the bit field + is part of an index. + @param make_copy_field + Set when using with rollup when we want to have + an exact copy of the field. + @retval + 0 on error + @retval + new_created field + Create a temporary field for Item_field (or its descendant), + either direct or referenced by an Item_ref. +*/ +Field *create_tmp_field(TABLE *table, Item *item, + Item ***copy_func, Field **from_field, + Field **default_field, + bool group, bool modify_item, + bool table_cant_handle_bit_fields, + bool make_copy_field) +{ + Tmp_field_src src; + Tmp_field_param prm(group, modify_item, table_cant_handle_bit_fields, + make_copy_field); + Field *result= item->create_tmp_field_ex(table->in_use->mem_root, + table, &src, &prm); + if (is_json_type(item) && make_json_valid_expr(table, result)) + result= NULL; + + *from_field= src.field(); + *default_field= src.default_field(); + if (src.item_result_field()) + *((*copy_func)++)= src.item_result_field(); + return result; +} + +/* + Set up column usage bitmaps for a temporary table + + IMPLEMENTATION + For temporary tables, we need one bitmap with all columns set and + a tmp_set bitmap to be used by things like filesort. +*/ + +void +setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count) +{ + uint bitmap_size= bitmap_buffer_size(field_count); + + DBUG_ASSERT(table->s->virtual_fields == 0); + + my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->tmp_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->eq_join_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->cond_set, + (my_bitmap_map*) bitmaps, field_count); + bitmaps+= bitmap_size; + my_bitmap_init(&table->has_value_set, + (my_bitmap_map*) bitmaps, field_count); + /* write_set and all_set are copies of read_set */ + table->def_write_set= table->def_read_set; + table->s->all_set= table->def_read_set; + bitmap_set_all(&table->s->all_set); + table->default_column_bitmaps(); +} + + +Create_tmp_table::Create_tmp_table(ORDER *group, bool distinct, + bool save_sum_fields, + ulonglong select_options, + ha_rows rows_limit) + :m_alloced_field_count(0), + m_using_unique_constraint(false), + m_temp_pool_slot(MY_BIT_NONE), + m_group(group), + m_distinct(distinct), + m_save_sum_fields(save_sum_fields), + m_with_cycle(false), + m_select_options(select_options), + m_rows_limit(rows_limit), + m_group_null_items(0), + current_counter(other) +{ + m_field_count[Create_tmp_table::distinct]= 0; + m_field_count[Create_tmp_table::other]= 0; + m_null_count[Create_tmp_table::distinct]= 0; + m_null_count[Create_tmp_table::other]= 0; + m_blobs_count[Create_tmp_table::distinct]= 0; + m_blobs_count[Create_tmp_table::other]= 0; + m_uneven_bit[Create_tmp_table::distinct]= 0; + m_uneven_bit[Create_tmp_table::other]= 0; +} + + +void Create_tmp_table::add_field(TABLE *table, Field *field, uint fieldnr, + bool force_not_null_cols) +{ + DBUG_ASSERT(!field->field_name.str || + strlen(field->field_name.str) == field->field_name.length); + + if (force_not_null_cols) + { + field->flags|= NOT_NULL_FLAG; + field->null_ptr= NULL; + } + + if (!(field->flags & NOT_NULL_FLAG)) + m_null_count[current_counter]++; + + table->s->reclength+= field->pack_length(); + + // Assign it here, before update_data_type_statistics() changes m_blob_count + if (field->flags & BLOB_FLAG) + { + table->s->blob_field[m_blob_count]= fieldnr; + m_blobs_count[current_counter]++; + } + + table->field[fieldnr]= field; + field->field_index= fieldnr; + + field->update_data_type_statistics(this); +} + + +/** + Create a temp table according to a field list. + + Given field pointers are changed to point at tmp_table for + send_result_set_metadata. The table object is self contained: it's + allocated in its own memory root, as well as Field objects + created for table columns. + This function will replace Item_sum items in 'fields' list with + corresponding Item_field items, pointing at the fields in the + temporary table, unless this was prohibited by TRUE + value of argument save_sum_fields. The Item_field objects + are created in THD memory root. + + @param thd thread handle + @param param a description used as input to create the table + @param fields list of items that will be used to define + column types of the table (also see NOTES) + @param group Create an unique key over all group by fields. + This is used to retrive the row during + end_write_group() and update them. + @param distinct should table rows be distinct + @param save_sum_fields see NOTES + @param select_options Optiions for how the select is run. + See sql_priv.h for a list of options. + @param rows_limit Maximum number of rows to insert into the + temporary table + @param table_alias possible name of the temporary table that can + be used for name resolving; can be "". + @param do_not_open only create the TABLE object, do not + open the table in the engine + @param keep_row_order rows need to be read in the order they were + inserted, the engine should preserve this order +*/ + +TABLE *Create_tmp_table::start(THD *thd, + TMP_TABLE_PARAM *param, + const LEX_CSTRING *table_alias) +{ + MEM_ROOT *mem_root_save, own_root; + TABLE *table; + TABLE_SHARE *share; + uint copy_func_count= param->func_count; + char *tmpname,path[FN_REFLEN]; + Field **reg_field; + uint *blob_field; + key_part_map *const_key_parts; + /* Treat sum functions as normal ones when loose index scan is used. */ + m_save_sum_fields|= param->precomputed_group_by; + DBUG_ENTER("Create_tmp_table::start"); + DBUG_PRINT("enter", + ("table_alias: '%s' distinct: %d save_sum_fields: %d " + "rows_limit: %lu group: %d", table_alias->str, + (int) m_distinct, (int) m_save_sum_fields, + (ulong) m_rows_limit, MY_TEST(m_group))); + + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + m_temp_pool_slot = temp_pool_set_next(); + + if (m_temp_pool_slot != MY_BIT_NONE) // we got a slot + sprintf(path, "%s-%s-%lx-%i", tmp_file_prefix, param->tmp_name, + current_pid, m_temp_pool_slot); + else + { + /* if we run out of slots or we are not using tempool */ + sprintf(path, "%s-%s-%lx-%llx-%x", tmp_file_prefix, param->tmp_name, + current_pid, thd->thread_id, thd->tmp_table++); + } + + /* + No need to change table name to lower case as we are only creating + MyISAM, Aria or HEAP tables here + */ + fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + if (m_group) + { + ORDER **prev= &m_group; + if (!param->quick_group) + m_group= 0; // Can't use group key + else for (ORDER *tmp= m_group ; tmp ; tmp= tmp->next) + { + /* Exclude found constant from the list */ + if ((*tmp->item)->const_item()) + { + *prev= tmp->next; + param->group_parts--; + continue; + } + else + prev= &(tmp->next); + /* + marker == 4 means two things: + - store NULLs in the key, and + - convert BIT fields to 64-bit long, needed because MEMORY tables + can't index BIT fields. + */ + (*tmp->item)->marker= MARKER_NULL_KEY; // Store null in key + if ((*tmp->item)->too_big_for_varchar()) + m_using_unique_constraint= true; + } + if (param->group_length >= MAX_BLOB_WIDTH) + m_using_unique_constraint= true; + if (m_group) + m_distinct= 0; // Can't use distinct + } + + m_alloced_field_count= param->field_count+param->func_count+param->sum_func_count; + DBUG_ASSERT(m_alloced_field_count); + const uint field_count= m_alloced_field_count; + + /* + When loose index scan is employed as access method, it already + computes all groups and the result of all aggregate functions. We + make space for the items of the aggregate function in the list of + functions TMP_TABLE_PARAM::items_to_copy, so that the values of + these items are stored in the temporary table. + */ + if (param->precomputed_group_by) + copy_func_count+= param->sum_func_count; + param->copy_func_count= copy_func_count; + + init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); + + if (!multi_alloc_root(&own_root, + &table, sizeof(*table), + &share, sizeof(*share), + ®_field, sizeof(Field*) * (field_count+1), + &m_default_field, sizeof(Field*) * (field_count), + &blob_field, sizeof(uint)*(field_count+1), + &m_from_field, sizeof(Field*)*field_count, + ¶m->items_to_copy, + sizeof(param->items_to_copy[0])*(copy_func_count+1), + ¶m->keyinfo, sizeof(*param->keyinfo), + &m_key_part_info, + sizeof(*m_key_part_info)*(param->group_parts+1), + ¶m->start_recinfo, + sizeof(*param->recinfo)*(field_count*2+4), + &tmpname, (uint) strlen(path)+1, + &m_group_buff, (m_group && ! m_using_unique_constraint ? + param->group_length : 0), + &m_bitmaps, bitmap_buffer_size(field_count)*6, + &const_key_parts, sizeof(*const_key_parts), + NullS)) + { + DBUG_RETURN(NULL); /* purecov: inspected */ + } + /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ + if (!(param->copy_field= new (thd->mem_root) Copy_field[field_count])) + { + free_root(&own_root, MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NULL); /* purecov: inspected */ + } + strmov(tmpname, path); + /* make table according to fields */ + + bzero((char*) table,sizeof(*table)); + bzero((char*) reg_field, sizeof(Field*) * (field_count+1)); + bzero((char*) m_default_field, sizeof(Field*) * (field_count)); + bzero((char*) m_from_field, sizeof(Field*) * field_count); + /* const_key_parts is used in sort_and_filter_keyuse */ + bzero((char*) const_key_parts, sizeof(*const_key_parts)); + + table->mem_root= own_root; + mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + table->field=reg_field; + table->const_key_parts= const_key_parts; + table->alias.set(table_alias->str, table_alias->length, table_alias_charset); + + table->reginfo.lock_type=TL_WRITE; /* Will be updated */ + table->map=1; + table->temp_pool_slot= m_temp_pool_slot; + table->copy_blobs= 1; + table->in_use= thd; + table->no_rows_with_nulls= param->force_not_null_cols; + table->expr_arena= thd; + + table->s= share; + init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname); + share->blob_field= blob_field; + share->table_charset= param->table_charset; + share->primary_key= MAX_KEY; // Indicate no primary key + if (param->schema_table) + share->db= INFORMATION_SCHEMA_NAME; + + param->using_outer_summary_function= 0; + thd->mem_root= mem_root_save; + DBUG_RETURN(table); +} + + +bool Create_tmp_table::add_fields(THD *thd, + TABLE *table, + TMP_TABLE_PARAM *param, + List<Item> &fields) +{ + DBUG_ENTER("Create_tmp_table::add_fields"); + DBUG_ASSERT(table); + DBUG_ASSERT(table->field); + DBUG_ASSERT(table->s->blob_field); + DBUG_ASSERT(table->s->reclength == 0); + DBUG_ASSERT(table->s->fields == 0); + DBUG_ASSERT(table->s->blob_fields == 0); + + const bool not_all_columns= !(m_select_options & TMP_TABLE_ALL_COLUMNS); + bool distinct_record_structure= m_distinct; + uint fieldnr= 0; + TABLE_SHARE *share= table->s; + Item **copy_func= param->items_to_copy; + + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + List_iterator_fast<Item> li(fields); + Item *item; + Field **tmp_from_field= m_from_field; + while (!m_with_cycle && (item= li++)) + if (item->is_in_with_cycle()) + { + m_with_cycle= true; + /* + Following distinct_record_structure is (m_distinct || m_with_cycle) + + Note: distinct_record_structure can be true even if m_distinct is + false, for example for incr_table in recursive CTE + (see select_union_recursive::create_result_table) + */ + distinct_record_structure= true; + } + li.rewind(); + while ((item=li++)) + { + uint uneven_delta; + current_counter= (((param->hidden_field_count < (fieldnr + 1)) && + distinct_record_structure && + (!m_with_cycle || item->is_in_with_cycle())) ? + distinct : + other); + Item::Type type= item->type(); + if (type == Item::COPY_STR_ITEM) + { + item= ((Item_copy *)item)->get_item(); + type= item->type(); + } + if (not_all_columns) + { + if (item->with_sum_func() && type != Item::SUM_FUNC_ITEM) + { + if (item->used_tables() & OUTER_REF_TABLE_BIT) + item->update_used_tables(); + if ((item->real_type() == Item::SUBSELECT_ITEM) || + (item->used_tables() & ~OUTER_REF_TABLE_BIT)) + { + /* + Mark that the we have ignored an item that refers to a summary + function. We need to know this if someone is going to use + DISTINCT on the result. + */ + param->using_outer_summary_function=1; + continue; + } + } + if (item->const_item() && + param->hidden_field_count < (fieldnr + 1)) + continue; // We don't have to store this + } + if (type == Item::SUM_FUNC_ITEM && !m_group && !m_save_sum_fields) + { /* Can't calc group yet */ + Item_sum *sum_item= (Item_sum *) item; + sum_item->result_field=0; + for (uint i= 0 ; i < sum_item->get_arg_count() ; i++) + { + Item *arg= sum_item->get_arg(i); + if (!arg->const_item()) + { + Item *tmp_item; + Field *new_field= + create_tmp_field(table, arg, ©_func, + tmp_from_field, &m_default_field[fieldnr], + m_group != 0, not_all_columns, + distinct_record_structure , false); + if (!new_field) + goto err; // Should be OOM + tmp_from_field++; + + thd->mem_root= mem_root_save; + if (!(tmp_item= new (thd->mem_root) + Item_field(thd, new_field))) + goto err; + ((Item_field*) tmp_item)->set_refers_to_temp_table(); + arg= sum_item->set_arg(i, thd, tmp_item); + thd->mem_root= &table->mem_root; + + uneven_delta= m_uneven_bit_length; + add_field(table, new_field, fieldnr++, param->force_not_null_cols); + m_field_count[current_counter]++; + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + + if (!(new_field->flags & NOT_NULL_FLAG)) + { + /* + new_field->maybe_null() is still false, it will be + changed below. But we have to setup Item_field correctly + */ + arg->set_maybe_null(); + } + if (current_counter == distinct) + new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; + } + } + } + else + { + /* + The last parameter to create_tmp_field_ex() is a bit tricky: + + We need to set it to 0 in union, to get fill_record() to modify the + temporary table. + We need to set it to 1 on multi-table-update and in select to + write rows to the temporary table. + We here distinguish between UNION and multi-table-updates by the fact + that in the later case group is set to the row pointer. + + The test for item->marker == MARKER_NULL_KEY is ensure we + don't create a group-by key over a bit field as heap tables + can't handle that. + */ + DBUG_ASSERT(!param->schema_table); + Field *new_field= + create_tmp_field(table, item, ©_func, + tmp_from_field, &m_default_field[fieldnr], + m_group != 0, + !param->force_copy_fields && + (not_all_columns || m_group !=0), + /* + If item->marker == MARKER_NULL_KEY then we + force create_tmp_field to create a 64-bit + longs for BIT fields because HEAP tables + can't index BIT fields directly. We do the + same for distinct, as we want the distinct + index to be usable in this case too. + */ + item->marker == MARKER_NULL_KEY || + param->bit_fields_as_long, + param->force_copy_fields); + if (unlikely(!new_field)) + { + if (unlikely(thd->is_fatal_error)) + goto err; // Got OOM + continue; // Some kind of const item + } + if (type == Item::SUM_FUNC_ITEM) + { + Item_sum *agg_item= (Item_sum *) item; + /* + Update the result field only if it has never been set, or if the + created temporary table is not to be used for subquery + materialization. + + The reason is that for subqueries that require + materialization as part of their plan, we create the + 'external' temporary table needed for IN execution, after + the 'internal' temporary table needed for grouping. Since + both the external and the internal temporary tables are + created for the same list of SELECT fields of the subquery, + setting 'result_field' for each invocation of + create_tmp_table overrides the previous value of + 'result_field'. + + The condition below prevents the creation of the external + temp table to override the 'result_field' that was set for + the internal temp table. + */ + if (!agg_item->result_field || !param->materialized_subquery) + agg_item->result_field= new_field; + } + tmp_from_field++; + + uneven_delta= m_uneven_bit_length; + add_field(table, new_field, fieldnr++, param->force_not_null_cols); + m_field_count[current_counter]++; + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + + if (item->marker == MARKER_NULL_KEY && item->maybe_null()) + { + m_group_null_items++; + new_field->flags|= GROUP_FLAG; + } + if (current_counter == distinct) + new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; + } + } + + DBUG_ASSERT(fieldnr == m_field_count[other] + m_field_count[distinct]); + DBUG_ASSERT(m_blob_count == m_blobs_count[other] + m_blobs_count[distinct]); + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + copy_func[0]= 0; // End marker + param->func_count= (uint) (copy_func - param->items_to_copy); + DBUG_ASSERT(param->func_count <= param->copy_func_count); + + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + DBUG_RETURN(false); + +err: + thd->mem_root= mem_root_save; + DBUG_RETURN(true); +} + + +bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param) +{ + TABLE_SHARE *share= table->s; + DBUG_ENTER("Create_tmp_table::choose_engine"); + /* + If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria) + In the future we should try making storage engine selection more dynamic + */ + + if (share->blob_fields || m_using_unique_constraint || + (thd->variables.big_tables && + !(m_select_options & SELECT_SMALL_RESULT)) || + (m_select_options & TMP_TABLE_FORCE_MYISAM) || + thd->variables.tmp_memory_table_size == 0) + { + share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + if (m_group && + (param->group_parts > table->file->max_key_parts() || + param->group_length > table->file->max_key_length())) + m_using_unique_constraint= true; + } + else + { + share->db_plugin= ha_lock_engine(0, heap_hton); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + } + DBUG_RETURN(!table->file); +} + + +bool Create_tmp_table::finalize(THD *thd, + TABLE *table, + TMP_TABLE_PARAM *param, + bool do_not_open, bool keep_row_order) +{ + DBUG_ENTER("Create_tmp_table::finalize"); + DBUG_ASSERT(table); + + uint null_pack_length[2]; + uint null_pack_base[2]; + uint null_counter[2]= {0, 0}; + uint whole_null_pack_length; + bool use_packed_rows= false; + bool save_abort_on_warning; + uchar *pos; + uchar *null_flags; + KEY *keyinfo; + TMP_ENGINE_COLUMNDEF *recinfo; + TABLE_SHARE *share= table->s; + Copy_field *copy= param->copy_field; + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + DBUG_ASSERT(m_alloced_field_count >= share->fields); + DBUG_ASSERT(m_alloced_field_count >= share->blob_fields); + + if (choose_engine(thd, table, param)) + goto err; + + if (table->file->set_ha_share_ref(&share->ha_share)) + { + delete table->file; + table->file= 0; + goto err; + } + table->file->set_table(table); + + if (!m_using_unique_constraint) + share->reclength+= m_group_null_items; // null flag is stored separately + + if (share->blob_fields == 0) + { + /* We need to ensure that first byte is not 0 for the delete link */ + if (m_field_count[other]) + m_null_count[other]++; + else + m_null_count[distinct]++; + } + + null_pack_length[other]= (m_null_count[other] + 7 + + m_uneven_bit[other]) / 8; + null_pack_base[other]= 0; + null_pack_length[distinct]= (m_null_count[distinct] + 7 + + m_uneven_bit[distinct]) / 8; + null_pack_base[distinct]= null_pack_length[other]; + whole_null_pack_length= null_pack_length[other] + + null_pack_length[distinct]; + share->reclength+= whole_null_pack_length; + if (!share->reclength) + share->reclength= 1; // Dummy select + share->stored_rec_length= share->reclength; + /* Use packed rows if there is blobs or a lot of space to gain */ + if (share->blob_fields || + (string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && + (share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS || + string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS))) + use_packed_rows= 1; + + { + uint alloc_length= ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1); + share->rec_buff_length= alloc_length; + if (!(table->record[0]= (uchar*) + alloc_root(&table->mem_root, alloc_length*3))) + goto err; + table->record[1]= table->record[0]+alloc_length; + share->default_values= table->record[1]+alloc_length; + } + + setup_tmp_table_column_bitmaps(table, m_bitmaps, table->s->fields); + + recinfo=param->start_recinfo; + null_flags=(uchar*) table->record[0]; + pos=table->record[0]+ whole_null_pack_length; + if (whole_null_pack_length) + { + bzero((uchar*) recinfo,sizeof(*recinfo)); + recinfo->type=FIELD_NORMAL; + recinfo->length= whole_null_pack_length; + recinfo++; + bfill(null_flags, whole_null_pack_length, 255); // Set null fields + + table->null_flags= (uchar*) table->record[0]; + share->null_fields= m_null_count[other] + m_null_count[distinct]; + share->null_bytes= share->null_bytes_for_compare= whole_null_pack_length; + } + + if (share->blob_fields == 0) + { + null_counter[(m_field_count[other] ? other : distinct)]++; + } + + /* Protect against warnings in field_conv() in the next loop*/ + save_abort_on_warning= thd->abort_on_warning; + thd->abort_on_warning= 0; + + for (uint i= 0; i < share->fields; i++, recinfo++) + { + Field *field= table->field[i]; + uint length; + bzero((uchar*) recinfo,sizeof(*recinfo)); + + current_counter= ((field->flags & FIELD_PART_OF_TMP_UNIQUE) ? + distinct : + other); + + if (!(field->flags & NOT_NULL_FLAG)) + { + recinfo->null_bit= (uint8)1 << (null_counter[current_counter] & 7); + recinfo->null_pos= (null_pack_base[current_counter] + + null_counter[current_counter]/8); + field->move_field(pos, null_flags + recinfo->null_pos, recinfo->null_bit); + null_counter[current_counter]++; + } + else + field->move_field(pos,(uchar*) 0,0); + if (field->type() == MYSQL_TYPE_BIT) + { + /* We have to reserve place for extra bits among null bits */ + ((Field_bit*) field)->set_bit_ptr(null_flags + + null_pack_base[current_counter] + + null_counter[current_counter]/8, + null_counter[current_counter] & 7); + null_counter[current_counter]+= (field->field_length & 7); + } + field->reset(); + + /* + Test if there is a default field value. The test for ->ptr is to skip + 'offset' fields generated by initialize_tables + */ + if (m_default_field[i] && m_default_field[i]->ptr) + { + /* + default_field[i] is set only in the cases when 'field' can + inherit the default value that is defined for the field referred + by the Item_field object from which 'field' has been created. + */ + Field *orig_field= m_default_field[i]; + /* Get the value from default_values */ + if (orig_field->is_null_in_record(orig_field->table->s->default_values)) + field->set_null(); + else + { + /* + Copy default value. We have to use field_conv() for copy, instead of + memcpy(), because bit_fields may be stored differently. + But otherwise we copy as is, in particular, ignore NO_ZERO_DATE, etc + */ + Use_relaxed_field_copy urfc(thd); + my_ptrdiff_t ptr_diff= (orig_field->table->s->default_values - + orig_field->table->record[0]); + field->set_notnull(); + orig_field->move_field_offset(ptr_diff); + field_conv(field, orig_field); + orig_field->move_field_offset(-ptr_diff); + } + } + + if (m_from_field[i]) + { /* Not a table Item */ + copy->set(field, m_from_field[i], m_save_sum_fields); + copy++; + } + length=field->pack_length_in_rec(); + pos+= length; + + /* Make entry for create table */ + recinfo->length=length; + recinfo->type= field->tmp_engine_column_type(use_packed_rows); + + // fix table name in field entry + field->set_table_name(&table->alias); + } + /* Handle group_null_items */ + bzero(pos, table->s->reclength - (pos - table->record[0])); + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + + thd->abort_on_warning= save_abort_on_warning; + param->copy_field_end= copy; + param->recinfo= recinfo; // Pointer to after last field + store_record(table,s->default_values); // Make empty default record + + if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit + share->max_rows= ~(ha_rows) 0; + else + share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ? + MY_MIN(thd->variables.tmp_memory_table_size, + thd->variables.max_heap_table_size) : + thd->variables.tmp_disk_table_size) / + share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options + /* + Push the LIMIT clause to the temporary table creation, so that we + materialize only up to 'rows_limit' records instead of all result records. + */ + set_if_smaller(share->max_rows, m_rows_limit); + param->end_write_records= m_rows_limit; + + keyinfo= param->keyinfo; + + if (m_group) + { + DBUG_PRINT("info",("Creating group key in temporary table")); + table->group= m_group; /* Table is grouped by key */ + param->group_buff= m_group_buff; + share->keys=1; + share->uniques= MY_TEST(m_using_unique_constraint); + table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); + keyinfo->key_part= m_key_part_info; + keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->usable_key_parts=keyinfo->user_defined_key_parts= param->group_parts; + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->key_length=0; + keyinfo->rec_per_key=NULL; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->name= group_key; + ORDER *cur_group= m_group; + for (; cur_group ; cur_group= cur_group->next, m_key_part_info++) + { + Field *field=(*cur_group->item)->get_tmp_table_field(); + DBUG_ASSERT(field->table == table); + bool maybe_null=(*cur_group->item)->maybe_null(); + m_key_part_info->null_bit=0; + m_key_part_info->field= field; + m_key_part_info->fieldnr= field->field_index + 1; + if (cur_group == m_group) + field->key_start.set_bit(0); + m_key_part_info->offset= field->offset(table->record[0]); + m_key_part_info->length= (uint16) field->key_length(); + m_key_part_info->type= (uint8) field->key_type(); + m_key_part_info->key_type = + ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; + m_key_part_info->key_part_flag= 0; + if (!m_using_unique_constraint) + { + cur_group->buff=(char*) m_group_buff; + + if (maybe_null && !field->null_bit) + { + /* + This can only happen in the unusual case where an outer join + table was found to be not-nullable by the optimizer and we + the item can't really be null. + We solve this by marking the item as !maybe_null to ensure + that the key,field and item definition match. + */ + maybe_null= 0; + (*cur_group->item)->base_flags&= ~item_base_t::MAYBE_NULL; + } + + if (!(cur_group->field= field->new_key_field(thd->mem_root,table, + m_group_buff + + MY_TEST(maybe_null), + m_key_part_info->length, + field->null_ptr, + field->null_bit))) + goto err; /* purecov: inspected */ + + if (maybe_null) + { + /* + To be able to group on NULL, we reserved place in group_buff + for the NULL flag just before the column. (see above). + The field data is after this flag. + The NULL flag is updated in 'end_update()' and 'end_write()' + */ + keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL + m_key_part_info->null_bit=field->null_bit; + m_key_part_info->null_offset= (uint) (field->null_ptr - + (uchar*) table->record[0]); + cur_group->buff++; // Pointer to field data + m_group_buff++; // Skipp null flag + } + m_group_buff+= cur_group->field->pack_length(); + } + keyinfo->key_length+= m_key_part_info->length; + } + /* + Ensure we didn't overrun the group buffer. The < is only true when + some maybe_null fields was changed to be not null fields. + */ + DBUG_ASSERT(m_using_unique_constraint || + m_group_buff <= param->group_buff + param->group_length); + } + + if (m_distinct && (share->fields != param->hidden_field_count || + m_with_cycle)) + { + uint i; + Field **reg_field; + /* + Create an unique key or an unique constraint over all columns + that should be in the result. In the temporary table, there are + 'param->hidden_field_count' extra columns, whose null bits are stored + in the first 'hidden_null_pack_length' bytes of the row. + */ + DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count)); + + if (m_blobs_count[distinct]) + { + /* + Special mode for index creation in MyISAM used to support unique + indexes on blobs with arbitrary length. Such indexes cannot be + used for lookups. + */ + share->uniques= 1; + } + keyinfo->user_defined_key_parts= m_field_count[distinct] + + (share->uniques ? MY_TEST(null_pack_length[distinct]) : 0); + keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; + keyinfo->usable_key_parts= keyinfo->user_defined_key_parts; + table->distinct= 1; + share->keys= 1; + if (!(m_key_part_info= (KEY_PART_INFO*) + alloc_root(&table->mem_root, + keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO)))) + goto err; + bzero((void*) m_key_part_info, keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); + table->key_info= table->s->key_info= keyinfo; + keyinfo->key_part= m_key_part_info; + keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY; + keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->key_length= 0; // Will compute the sum of the parts below. + keyinfo->name= distinct_key; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; + + /* + Needed by non-merged semi-joins: SJ-Materialized table must have a valid + rec_per_key array, because it participates in join optimization. Since + the table has no data, the only statistics we can provide is "unknown", + i.e. zero values. + + (For table record count, we calculate and set JOIN_TAB::found_records, + see get_delayed_table_estimates()). + */ + size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]); + if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root, + rpk_size))) + goto err; + bzero(keyinfo->rec_per_key, rpk_size); + + /* + Create an extra field to hold NULL bits so that unique indexes on + blobs can distinguish NULL from 0. This extra field is not needed + when we do not use UNIQUE indexes for blobs. + */ + if (null_pack_length[distinct] && share->uniques) + { + m_key_part_info->null_bit=0; + m_key_part_info->offset= null_pack_base[distinct]; + m_key_part_info->length= null_pack_length[distinct]; + m_key_part_info->field= new Field_string(table->record[0], + (uint32) m_key_part_info->length, + (uchar*) 0, + (uint) 0, + Field::NONE, + &null_clex_str, &my_charset_bin); + if (!m_key_part_info->field) + goto err; + m_key_part_info->field->init(table); + m_key_part_info->key_type=FIELDFLAG_BINARY; + m_key_part_info->type= HA_KEYTYPE_BINARY; + m_key_part_info->fieldnr= m_key_part_info->field->field_index + 1; + m_key_part_info++; + } + /* Create a distinct key over the columns we are going to return */ + for (i= param->hidden_field_count, reg_field= table->field + i ; + i < share->fields; + i++, reg_field++) + { + if (!((*reg_field)->flags & FIELD_PART_OF_TMP_UNIQUE)) + continue; + m_key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (m_key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + m_key_part_info->null_bit= (*reg_field)->null_bit; + m_key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + + m_key_part_info->offset= (*reg_field)->offset(table->record[0]); + m_key_part_info->length= (uint16) (*reg_field)->pack_length(); + m_key_part_info->fieldnr= (*reg_field)->field_index + 1; + /* TODO: + The below method of computing the key format length of the + key part is a copy/paste from opt_range.cc, and table.cc. + This should be factored out, e.g. as a method of Field. + In addition it is not clear if any of the Field::*_length + methods is supposed to compute the same length. If so, it + might be reused. + */ + m_key_part_info->store_length= m_key_part_info->length; + + if ((*reg_field)->real_maybe_null()) + { + m_key_part_info->store_length+= HA_KEY_NULL_LENGTH; + m_key_part_info->key_part_flag |= HA_NULL_PART; + } + m_key_part_info->key_part_flag|= (*reg_field)->key_part_flag(); + m_key_part_info->store_length+= (*reg_field)->key_part_length_bytes(); + keyinfo->key_length+= m_key_part_info->store_length; + + m_key_part_info->type= (uint8) (*reg_field)->key_type(); + m_key_part_info->key_type = + ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; + + m_key_part_info++; + } + } + + if (unlikely(thd->is_fatal_error)) // If end of memory + goto err; /* purecov: inspected */ + share->db_record_offset= 1; + table->used_for_duplicate_elimination= (param->sum_func_count == 0 && + (table->group || table->distinct)); + table->keep_row_order= keep_row_order; + + if (!do_not_open) + { + if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo, + ¶m->recinfo, m_select_options)) + goto err; + } + + /* record[0] and share->default_values should now have been set up */ + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + MEM_CHECK_DEFINED(share->default_values, table->s->reclength); + + empty_record(table); + table->status= STATUS_NO_RECORD; + thd->mem_root= mem_root_save; + + DBUG_RETURN(false); + +err: + thd->mem_root= mem_root_save; + DBUG_RETURN(true); /* purecov: inspected */ +} + + +bool Create_tmp_table::add_schema_fields(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table) +{ + DBUG_ENTER("Create_tmp_table::add_schema_fields"); + DBUG_ASSERT(table); + DBUG_ASSERT(table->field); + DBUG_ASSERT(table->s->blob_field); + DBUG_ASSERT(table->s->reclength == 0); + DBUG_ASSERT(table->s->fields == 0); + DBUG_ASSERT(table->s->blob_fields == 0); + + TABLE_SHARE *share= table->s; + ST_FIELD_INFO *defs= schema_table.fields_info; + uint fieldnr; + MEM_ROOT *mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + for (fieldnr= 0; !defs[fieldnr].end_marker(); fieldnr++) + { + const ST_FIELD_INFO &def= defs[fieldnr]; + Record_addr addr(def.nullable()); + const Type_handler *h= def.type_handler(); + Field *field= h->make_schema_field(&table->mem_root, table, addr, def); + if (!field) + { + thd->mem_root= mem_root_save; + DBUG_RETURN(true); // EOM + } + field->init(table); + field->flags|= NO_DEFAULT_VALUE_FLAG; + add_field(table, field, fieldnr, param->force_not_null_cols); + } + + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + param->func_count= 0; + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + DBUG_RETURN(false); +} + + +void Create_tmp_table::cleanup_on_failure(THD *thd, TABLE *table) +{ + if (table) + free_tmp_table(thd, table); + if (m_temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(m_temp_pool_slot); +} + + +TABLE *create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, + ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, + const LEX_CSTRING *table_alias, bool do_not_open, + bool keep_row_order) +{ + TABLE *table; + Create_tmp_table maker(group, distinct, save_sum_fields, select_options, + rows_limit); + if (!(table= maker.start(thd, param, table_alias)) || + maker.add_fields(thd, table, param, fields) || + maker.finalize(thd, table, param, do_not_open, keep_row_order)) + { + maker.cleanup_on_failure(thd, table); + return NULL; + } + return table; +} + + +TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param, + const ST_SCHEMA_TABLE &schema_table, + longlong select_options, + const LEX_CSTRING &table_alias, + bool do_not_open, bool keep_row_order) +{ + TABLE *table; + Create_tmp_table maker((ORDER *) NULL, false, false, + select_options, HA_POS_ERROR); + if (!(table= maker.start(thd, param, &table_alias)) || + maker.add_schema_fields(thd, table, param, schema_table) || + maker.finalize(thd, table, param, do_not_open, keep_row_order)) + { + maker.cleanup_on_failure(thd, table); + return NULL; + } + return table; +} + + +/****************************************************************************/ + +void *Virtual_tmp_table::operator new(size_t size, THD *thd) throw() +{ + return (Virtual_tmp_table *) alloc_root(thd->mem_root, size); +} + + +bool Virtual_tmp_table::init(uint field_count) +{ + uint *blob_field; + uchar *bitmaps; + DBUG_ENTER("Virtual_tmp_table::init"); + if (!multi_alloc_root(in_use->mem_root, + &s, sizeof(*s), + &field, (field_count + 1) * sizeof(Field*), + &blob_field, (field_count + 1) * sizeof(uint), + &bitmaps, bitmap_buffer_size(field_count) * 6, + NullS)) + DBUG_RETURN(true); + s->reset(); + s->blob_field= blob_field; + setup_tmp_table_column_bitmaps(this, bitmaps, field_count); + m_alloced_field_count= field_count; + DBUG_RETURN(false); +}; + + +bool Virtual_tmp_table::add(List<Spvar_definition> &field_list) +{ + /* Create all fields and calculate the total length of record */ + Spvar_definition *cdef; /* column definition */ + List_iterator_fast<Spvar_definition> it(field_list); + DBUG_ENTER("Virtual_tmp_table::add"); + while ((cdef= it++)) + { + Field *tmp; + Record_addr addr(f_maybe_null(cdef->pack_flag)); + if (!(tmp= cdef->make_field(s, in_use->mem_root, &addr, &cdef->field_name))) + DBUG_RETURN(true); + add(tmp); + } + DBUG_RETURN(false); +} + + +void Virtual_tmp_table::setup_field_pointers() +{ + uchar *null_pos= record[0]; + uchar *field_pos= null_pos + s->null_bytes; + uint null_bit= 1; + + for (Field **cur_ptr= field; *cur_ptr; ++cur_ptr) + { + Field *cur_field= *cur_ptr; + if ((cur_field->flags & NOT_NULL_FLAG)) + cur_field->move_field(field_pos); + else + { + cur_field->move_field(field_pos, (uchar*) null_pos, null_bit); + null_bit<<= 1; + if (null_bit == (uint)1 << 8) + { + ++null_pos; + null_bit= 1; + } + } + if (cur_field->type() == MYSQL_TYPE_BIT && + cur_field->key_type() == HA_KEYTYPE_BIT) + { + /* This is a Field_bit since key_type is HA_KEYTYPE_BIT */ + static_cast<Field_bit*>(cur_field)->set_bit_ptr(null_pos, null_bit); + null_bit+= cur_field->field_length & 7; + if (null_bit > 7) + { + null_pos++; + null_bit-= 8; + } + } + cur_field->reset(); + field_pos+= cur_field->pack_length(); + } +} + + +bool Virtual_tmp_table::open() +{ + // Make sure that we added all the fields we planned to: + DBUG_ASSERT(s->fields == m_alloced_field_count); + field[s->fields]= NULL; // mark the end of the list + s->blob_field[s->blob_fields]= 0; // mark the end of the list + + uint null_pack_length= (s->null_fields + 7) / 8; // NULL-bit array length + s->reclength+= null_pack_length; + s->rec_buff_length= ALIGN_SIZE(s->reclength + 1); + if (!(record[0]= (uchar*) in_use->alloc(s->rec_buff_length))) + return true; + if (null_pack_length) + { + null_flags= (uchar*) record[0]; + s->null_bytes= s->null_bytes_for_compare= null_pack_length; + } + setup_field_pointers(); + return false; +} + + +bool Virtual_tmp_table::sp_find_field_by_name(uint *idx, + const LEX_CSTRING &name) const +{ + Field *f; + for (uint i= 0; (f= field[i]); i++) + { + // Use the same comparison style with sp_context::find_variable() + if (!system_charset_info->strnncoll(f->field_name.str, f->field_name.length, + name.str, name.length)) + { + *idx= i; + return false; + } + } + return true; +} + + +bool +Virtual_tmp_table::sp_find_field_by_name_or_error(uint *idx, + const LEX_CSTRING &var_name, + const LEX_CSTRING &field_name) + const +{ + if (sp_find_field_by_name(idx, field_name)) + { + my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0), + var_name.str, field_name.str); + return true; + } + return false; +} + + +bool Virtual_tmp_table::sp_set_all_fields_from_item_list(THD *thd, + List<Item> &items) +{ + DBUG_ASSERT(s->fields == items.elements); + List_iterator<Item> it(items); + Item *item; + for (uint i= 0 ; (item= it++) ; i++) + { + if (field[i]->sp_prepare_and_store_item(thd, &item)) + return true; + } + return false; +} + + +bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value) +{ + DBUG_ASSERT(value->fixed()); + DBUG_ASSERT(value->cols() == s->fields); + for (uint i= 0; i < value->cols(); i++) + { + if (field[i]->sp_prepare_and_store_item(thd, value->addr(i))) + return true; + } + return false; +} + + +bool open_tmp_table(TABLE *table) +{ + int error; + if (unlikely((error= table->file->ha_open(table, table->s->path.str, O_RDWR, + HA_OPEN_TMP_TABLE | + HA_OPEN_INTERNAL_TABLE)))) + { + table->file->print_error(error, MYF(0)); /* purecov: inspected */ + table->db_stat= 0; + return 1; + } + table->db_stat= HA_OPEN_KEYFILE; + (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */ + if (!table->is_created()) + { + table->set_created(); + table->in_use->inc_status_created_tmp_tables(); + } + + return 0; +} + + +#ifdef USE_ARIA_FOR_TMP_TABLES +/* + Create internal (MyISAM or Maria) temporary table + + SYNOPSIS + create_internal_tmp_table() + table Table object that descrimes the table to be created + keyinfo Description of the index (there is always one index) + start_recinfo engine's column descriptions + recinfo INOUT End of engine's column descriptions + options Option bits + + DESCRIPTION + Create an internal emporary table according to passed description. The is + assumed to have one unique index or constraint. + + The passed array or TMP_ENGINE_COLUMNDEF structures must have this form: + + 1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte + when there are many nullable columns) + 2. Table columns + 3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here) + + This function may use the free element to create hash column for unique + constraint. + + RETURN + FALSE - OK + TRUE - Error +*/ + + +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + int error; + MARIA_KEYDEF keydef; + MARIA_UNIQUEDEF uniquedef; + TABLE_SHARE *share= table->s; + MARIA_CREATE_INFO create_info; + DBUG_ENTER("create_internal_tmp_table"); + + if (share->keys) + { // Get keys for ni_create + bool using_unique_constraint=0; + HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, + sizeof(*seg) * keyinfo->user_defined_key_parts); + if (!seg) + goto err; + + bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts); + /* + Note that a similar check is performed during + subquery_types_allow_materialization. See MDEV-7122 for more details as + to why. Whenever this changes, it must be updated there as well, for + all tmp_table engines. + */ + if (keyinfo->key_length > table->file->max_key_length() || + keyinfo->user_defined_key_parts > table->file->max_key_parts() || + share->uniques) + { + if (!share->uniques && !(keyinfo->flags & HA_NOSAME)) + { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Using too big key for internal temp tables"); + DBUG_RETURN(1); + } + + /* Can't create a key; Make a unique constraint instead of a key */ + share->keys= 0; + share->uniques= 1; + using_unique_constraint=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + uniquedef.keysegs=keyinfo->user_defined_key_parts; + uniquedef.seg=seg; + uniquedef.null_are_equal=1; + + /* Create extra column for hash value */ + bzero((uchar*) *recinfo,sizeof(**recinfo)); + (*recinfo)->type= FIELD_CHECK; + (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH; + (*recinfo)++; + + /* Avoid warnings from valgrind */ + bzero(table->record[0]+ share->reclength, MARIA_UNIQUE_HASH_LENGTH); + bzero(share->default_values+ share->reclength, MARIA_UNIQUE_HASH_LENGTH); + share->reclength+= MARIA_UNIQUE_HASH_LENGTH; + } + else + { + /* Create a key */ + bzero((char*) &keydef,sizeof(keydef)); + keydef.flag= keyinfo->flags & HA_NOSAME; + keydef.keysegs= keyinfo->user_defined_key_parts; + keydef.seg= seg; + } + for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++) + { + Field *field=keyinfo->key_part[i].field; + seg->flag= 0; + seg->language= field->charset()->number; + seg->length= keyinfo->key_part[i].length; + seg->start= keyinfo->key_part[i].offset; + if (field->flags & BLOB_FLAG) + { + seg->type= + ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? + HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); + seg->bit_start= (uint8)(field->pack_length() - + portable_sizeof_char_ptr); + seg->flag= HA_BLOB_PART; + seg->length=0; // Whole blob in unique constraint + } + else + { + seg->type= keyinfo->key_part[i].type; + /* Tell handler if it can do suffic space compression */ + if (field->real_type() == MYSQL_TYPE_STRING && + keyinfo->key_part[i].length > 32) + seg->flag|= HA_SPACE_PACK; + } + if (!(field->flags & NOT_NULL_FLAG)) + { + seg->null_bit= field->null_bit; + seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]); + /* + We are using a GROUP BY on something that contains NULL + In this case we have to tell Aria that two NULL should + on INSERT be regarded at the same value + */ + if (!using_unique_constraint) + keydef.flag|= HA_NULL_ARE_EQUAL; + } + } + } + bzero((char*) &create_info,sizeof(create_info)); + create_info.data_file_length= table->in_use->variables.tmp_disk_table_size; + + /* + The logic for choosing the record format: + The STATIC_RECORD format is the fastest one, because it's so simple, + so we use this by default for short rows. + BLOCK_RECORD caches both row and data, so this is generally faster than + DYNAMIC_RECORD. The one exception is when we write to tmp table and + want to use keys for duplicate elimination as with BLOCK RECORD + we first write the row, then check for key conflicts and then we have to + delete the row. The cases when this can happen is when there is + a group by and no sum functions or if distinct is used. + */ + { + enum data_file_type file_type= table->no_rows ? NO_RECORD : + (share->reclength < 64 && !share->blob_fields ? STATIC_RECORD : + table->used_for_duplicate_elimination ? DYNAMIC_RECORD : BLOCK_RECORD); + uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE | + (table->keep_row_order ? HA_PRESERVE_INSERT_ORDER : 0); + + if (file_type != NO_RECORD && encrypt_tmp_disk_tables) + { + /* encryption is only supported for BLOCK_RECORD */ + file_type= BLOCK_RECORD; + if (table->used_for_duplicate_elimination) + { + /* + sql-layer expect the last column to be stored/restored also + when it's null. + + This is probably a bug (that sql-layer doesn't annotate + the column as not-null) but both heap, aria-static, aria-dynamic and + myisam has this property. aria-block_record does not since it + does not store null-columns at all. + Emulate behaviour by making column not-nullable when creating the + table. + */ + uint cols= (uint)(*recinfo-start_recinfo); + start_recinfo[cols-1].null_bit= 0; + } + } + + if (unlikely((error= maria_create(share->path.str, file_type, share->keys, + &keydef, (uint) (*recinfo-start_recinfo), + start_recinfo, share->uniques, &uniquedef, + &create_info, create_flags)))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + table->db_stat=0; + goto err; + } + } + + table->in_use->inc_status_created_tmp_disk_tables(); + table->in_use->inc_status_created_tmp_tables(); + share->db_record_offset= 1; + table->set_created(); + DBUG_RETURN(0); + err: + DBUG_RETURN(1); +} + +#else + +/* + Create internal (MyISAM or Maria) temporary table + + SYNOPSIS + create_internal_tmp_table() + table Table object that descrimes the table to be created + keyinfo Description of the index (there is always one index) + start_recinfo engine's column descriptions + recinfo INOUT End of engine's column descriptions + options Option bits + + DESCRIPTION + Create an internal emporary table according to passed description. The is + assumed to have one unique index or constraint. + + The passed array or TMP_ENGINE_COLUMNDEF structures must have this form: + + 1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte + when there are many nullable columns) + 2. Table columns + 3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here) + + This function may use the free element to create hash column for unique + constraint. + + RETURN + FALSE - OK + TRUE - Error +*/ + +/* Create internal MyISAM temporary table */ + +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + int error; + MI_KEYDEF keydef; + MI_UNIQUEDEF uniquedef; + TABLE_SHARE *share= table->s; + DBUG_ENTER("create_internal_tmp_table"); + + if (share->keys) + { // Get keys for ni_create + bool using_unique_constraint=0; + HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, + sizeof(*seg) * keyinfo->user_defined_key_parts); + if (!seg) + goto err; + + bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts); + /* + Note that a similar check is performed during + subquery_types_allow_materialization. See MDEV-7122 for more details as + to why. Whenever this changes, it must be updated there as well, for + all tmp_table engines. + */ + if (keyinfo->key_length > table->file->max_key_length() || + keyinfo->user_defined_key_parts > table->file->max_key_parts() || + share->uniques) + { + /* Can't create a key; Make a unique constraint instead of a key */ + share->keys= 0; + share->uniques= 1; + using_unique_constraint=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + uniquedef.keysegs=keyinfo->user_defined_key_parts; + uniquedef.seg=seg; + uniquedef.null_are_equal=1; + + /* Create extra column for hash value */ + bzero((uchar*) *recinfo,sizeof(**recinfo)); + (*recinfo)->type= FIELD_CHECK; + (*recinfo)->length=MI_UNIQUE_HASH_LENGTH; + (*recinfo)++; + /* Avoid warnings from valgrind */ + bzero(table->record[0]+ share->reclength, MI_UNIQUE_HASH_LENGTH); + bzero(share->default_values+ share->reclength, MI_UNIQUE_HASH_LENGTH); + share->reclength+= MI_UNIQUE_HASH_LENGTH; + } + else + { + /* Create an unique key */ + bzero((char*) &keydef,sizeof(keydef)); + keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY | + HA_PACK_KEY); + keydef.keysegs= keyinfo->user_defined_key_parts; + keydef.seg= seg; + } + for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++) + { + Field *field=keyinfo->key_part[i].field; + seg->flag= 0; + seg->language= field->charset()->number; + seg->length= keyinfo->key_part[i].length; + seg->start= keyinfo->key_part[i].offset; + if (field->flags & BLOB_FLAG) + { + seg->type= + ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? + HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); + seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr); + seg->flag= HA_BLOB_PART; + seg->length=0; // Whole blob in unique constraint + } + else + { + seg->type= keyinfo->key_part[i].type; + /* Tell handler if it can do suffic space compression */ + if (field->real_type() == MYSQL_TYPE_STRING && + keyinfo->key_part[i].length > 4) + seg->flag|= HA_SPACE_PACK; + } + if (!(field->flags & NOT_NULL_FLAG)) + { + seg->null_bit= field->null_bit; + seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]); + /* + We are using a GROUP BY on something that contains NULL + In this case we have to tell MyISAM that two NULL should + on INSERT be regarded at the same value + */ + if (!using_unique_constraint) + keydef.flag|= HA_NULL_ARE_EQUAL; + } + } + } + MI_CREATE_INFO create_info; + bzero((char*) &create_info,sizeof(create_info)); + create_info.data_file_length= table->in_use->variables.tmp_disk_table_size; + + if (unlikely((error= mi_create(share->path.str, share->keys, &keydef, + (uint) (*recinfo-start_recinfo), + start_recinfo, + share->uniques, &uniquedef, + &create_info, + HA_CREATE_TMP_TABLE | + HA_CREATE_INTERNAL_TABLE | + ((share->db_create_options & + HA_OPTION_PACK_RECORD) ? + HA_PACK_RECORD : 0) + )))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + table->db_stat=0; + goto err; + } + table->in_use->inc_status_created_tmp_disk_tables(); + table->in_use->inc_status_created_tmp_tables(); + share->db_record_offset= 1; + table->set_created(); + DBUG_RETURN(0); + err: + DBUG_RETURN(1); +} + +#endif /* USE_ARIA_FOR_TMP_TABLES */ + + +/* + If a HEAP table gets full, create a internal table in MyISAM or Maria + and copy all rows to this +*/ + + +bool +create_internal_tmp_table_from_heap(THD *thd, TABLE *table, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + int error, + bool ignore_last_dupp_key_error, + bool *is_duplicate) +{ + TABLE new_table; + TABLE_SHARE share; + const char *save_proc_info; + int write_err= 0; + DBUG_ENTER("create_internal_tmp_table_from_heap"); + if (is_duplicate) + *is_duplicate= FALSE; + + if (table->s->db_type() != heap_hton || error != HA_ERR_RECORD_FILE_FULL) + { + /* + We don't want this error to be converted to a warning, e.g. in case of + INSERT IGNORE ... SELECT. + */ + table->file->print_error(error, MYF(ME_FATAL)); + DBUG_RETURN(1); + } + new_table= *table; + share= *table->s; + new_table.s= &share; + new_table.s->db_plugin= ha_lock_engine(thd, TMP_ENGINE_HTON); + if (unlikely(!(new_table.file= get_new_handler(&share, &new_table.mem_root, + TMP_ENGINE_HTON)))) + DBUG_RETURN(1); // End of memory + + if (unlikely(new_table.file->set_ha_share_ref(&share.ha_share))) + { + delete new_table.file; + DBUG_RETURN(1); + } + + save_proc_info=thd->proc_info; + THD_STAGE_INFO(thd, stage_converting_heap_to_myisam); + + new_table.no_rows= table->no_rows; + if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo, + recinfo, + thd->lex->first_select_lex()->options | + thd->variables.option_bits)) + goto err2; + if (open_tmp_table(&new_table)) + goto err1; + if (table->file->indexes_are_disabled()) + new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL); + table->file->ha_index_or_rnd_end(); + if (table->file->ha_rnd_init_with_error(1)) + DBUG_RETURN(1); + if (new_table.no_rows) + new_table.file->extra(HA_EXTRA_NO_ROWS); + else + { + /* update table->file->stats.records */ + table->file->info(HA_STATUS_VARIABLE); + new_table.file->ha_start_bulk_insert(table->file->stats.records); + } + + /* + copy all old rows from heap table to MyISAM table + This is the only code that uses record[1] to read/write but this + is safe as this is a temporary MyISAM table without timestamp/autoincrement + or partitioning. + */ + while (!table->file->ha_rnd_next(new_table.record[1])) + { + write_err= new_table.file->ha_write_tmp_row(new_table.record[1]); + DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;); + if (write_err) + goto err; + if (unlikely(thd->check_killed())) + goto err_killed; + } + if (!new_table.no_rows && new_table.file->ha_end_bulk_insert()) + goto err; + /* copy row that filled HEAP table */ + if (unlikely((write_err=new_table.file->ha_write_tmp_row(table->record[0])))) + { + if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) || + !ignore_last_dupp_key_error) + goto err; + if (is_duplicate) + *is_duplicate= TRUE; + } + else + { + if (is_duplicate) + *is_duplicate= FALSE; + } + + /* remove heap table and change to use myisam table */ + (void) table->file->ha_rnd_end(); + (void) table->file->ha_close(); // This deletes the table ! + delete table->file; + table->file=0; + plugin_unlock(0, table->s->db_plugin); + share.db_plugin= my_plugin_lock(0, share.db_plugin); + new_table.s= table->s; // Keep old share + *table= new_table; + *table->s= share; + + table->file->change_table_ptr(table, table->s); + table->use_all_columns(); + if (save_proc_info) + thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ? + "Copying to tmp table on disk" : save_proc_info)); + DBUG_RETURN(0); + + err: + DBUG_PRINT("error",("Got error: %d",write_err)); + table->file->print_error(write_err, MYF(0)); +err_killed: + (void) table->file->ha_rnd_end(); + (void) new_table.file->ha_close(); + err1: + TMP_ENGINE_HTON->drop_table(TMP_ENGINE_HTON, new_table.s->path.str); + err2: + delete new_table.file; + thd_proc_info(thd, save_proc_info); + table->mem_root= new_table.mem_root; + DBUG_RETURN(1); +} + + +void +free_tmp_table(THD *thd, TABLE *entry) +{ + MEM_ROOT own_root= entry->mem_root; + const char *save_proc_info; + DBUG_ENTER("free_tmp_table"); + DBUG_PRINT("enter",("table: %s alias: %s",entry->s->table_name.str, + entry->alias.c_ptr())); + + save_proc_info=thd->proc_info; + THD_STAGE_INFO(thd, stage_removing_tmp_table); + + if (entry->file && entry->is_created()) + { + if (entry->db_stat) + { + /* The table was properly opened in open_tmp_table() */ + entry->file->ha_index_or_rnd_end(); + entry->file->info(HA_STATUS_VARIABLE); + thd->tmp_tables_size+= (entry->file->stats.data_file_length + + entry->file->stats.index_file_length); + } + entry->file->ha_drop_table(entry->s->path.str); + delete entry->file; + entry->file= NULL; + entry->reset_created(); + } + + /* free blobs */ + for (Field **ptr=entry->field ; *ptr ; ptr++) + (*ptr)->free(); + + if (entry->temp_pool_slot != MY_BIT_NONE) + temp_pool_clear_bit(entry->temp_pool_slot); + + plugin_unlock(0, entry->s->db_plugin); + entry->alias.free(); + + if (entry->pos_in_table_list && entry->pos_in_table_list->table) + { + DBUG_ASSERT(entry->pos_in_table_list->table == entry); + entry->pos_in_table_list->table= NULL; + } + + free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ + thd_proc_info(thd, save_proc_info); + + DBUG_VOID_RETURN; +} + + +/** + @brief + Set write_func of AGGR_OP object + + @param join_tab JOIN_TAB of the corresponding tmp table + + @details + Function sets up write_func according to how AGGR_OP object that + is attached to the given join_tab will be used in the query. +*/ + +void set_postjoin_aggr_write_func(JOIN_TAB *tab) +{ + JOIN *join= tab->join; + TABLE *table= tab->table; + AGGR_OP *aggr= tab->aggr; + TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param; + + DBUG_ASSERT(table && aggr); + + if (table->group && tmp_tbl->sum_func_count && + !tmp_tbl->precomputed_group_by) + { + /* + Note for MyISAM tmp tables: if uniques is true keys won't be + created. + */ + if (table->s->keys && !table->s->uniques) + { + DBUG_PRINT("info",("Using end_update")); + aggr->set_write_func(end_update); + } + else + { + DBUG_PRINT("info",("Using end_unique_update")); + aggr->set_write_func(end_unique_update); + } + } + else if (join->sort_and_group && !tmp_tbl->precomputed_group_by && + !join->sort_and_group_aggr_tab && join->tables_list && + join->top_join_tab_count) + { + DBUG_PRINT("info",("Using end_write_group")); + aggr->set_write_func(end_write_group); + join->sort_and_group_aggr_tab= tab; + } + else + { + DBUG_PRINT("info",("Using end_write")); + aggr->set_write_func(end_write); + if (tmp_tbl->precomputed_group_by) + { + /* + A preceding call to create_tmp_table in the case when loose + index scan is used guarantees that + TMP_TABLE_PARAM::items_to_copy has enough space for the group + by functions. It is OK here to use memcpy since we copy + Item_sum pointers into an array of Item pointers. + */ + memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count, + join->sum_funcs, + sizeof(Item*)*tmp_tbl->sum_func_count); + tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0; + } + } +} + + +/** + @details + Rows produced by a join sweep may end up in a temporary table or be sent + to a client. Set the function of the nested loop join algorithm which + handles final fully constructed and matched records. + + @param join join to setup the function for. + + @return + end_select function to use. This function can't fail. +*/ + +Next_select_func setup_end_select_func(JOIN *join) +{ + TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param; + + /* + Choose method for presenting result to user. Use end_send_group + if the query requires grouping (has a GROUP BY clause and/or one or + more aggregate functions). Use end_send if the query should not + be grouped. + */ + if (join->sort_and_group && !tmp_tbl->precomputed_group_by) + { + DBUG_PRINT("info",("Using end_send_group")); + return end_send_group; + } + DBUG_PRINT("info",("Using end_send")); + return end_send; +} + + +/** + Make a join of all tables and write it on socket or to table. + + @retval + 0 if ok + @retval + 1 if error is sent + @retval + -1 if error should be sent +*/ + +static int +do_select(JOIN *join, Procedure *procedure) +{ + int rc= 0; + enum_nested_loop_state error= NESTED_LOOP_OK; + DBUG_ENTER("do_select"); + + if (join->pushdown_query) + { + /* Select fields are in the temporary table */ + join->fields= &join->tmp_fields_list1; + /* Setup HAVING to work with fields in temporary table */ + join->set_items_ref_array(join->items1); + /* The storage engine will take care of the group by query result */ + int res= join->pushdown_query->execute(join); + + if (res) + DBUG_RETURN(res); + + if (join->pushdown_query->store_data_in_temp_table) + { + JOIN_TAB *last_tab= join->join_tab + join->exec_join_tab_cnt(); + last_tab->next_select= end_send; + + enum_nested_loop_state state= last_tab->aggr->end_send(); + if (state >= NESTED_LOOP_OK) + state= sub_select(join, last_tab, true); + + if (state < NESTED_LOOP_OK) + res= 1; + + if (join->result->send_eof()) + res= 1; + } + DBUG_RETURN(res); + } + + join->procedure= procedure; + join->duplicate_rows= join->send_records=0; + if (join->only_const_tables() && !join->need_tmp) + { + Next_select_func end_select= setup_end_select_func(join); + + /* + HAVING will be checked after processing aggregate functions, + But WHERE should checked here (we alredy have read tables). + Notice that make_join_select() splits all conditions in this case + into two groups exec_const_cond and outer_ref_cond. + If join->table_count == join->const_tables then it is + sufficient to check only the condition pseudo_bits_cond. + */ + DBUG_ASSERT(join->outer_ref_cond == NULL); + if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int()) + { + // HAVING will be checked by end_select + error= (*end_select)(join, 0, 0); + if (error >= NESTED_LOOP_OK) + error= (*end_select)(join, 0, 1); + + /* + If we don't go through evaluate_join_record(), do the counting + here. join->send_records is increased on success in end_send(), + so we don't touch it here. + */ + join->join_examined_rows++; + DBUG_ASSERT(join->join_examined_rows <= 1); + } + else if (join->send_row_on_empty_set()) + { + table_map cleared_tables= (table_map) 0; + if (end_select == end_send_group) + { + /* + Was a grouping query but we did not find any rows. In this case + we clear all tables to get null in any referenced fields, + like in case of: + SELECT MAX(a) AS f1, a AS f2 FROM t1 WHERE VALUE(a) IS NOT NULL + */ + clear_tables(join, &cleared_tables); + } + if (!join->having || join->having->val_int()) + { + List<Item> *columns_list= (procedure ? &join->procedure_fields_list : + join->fields); + rc= join->result->send_data_with_check(*columns_list, + join->unit, 0) > 0; + } + /* + We have to remove the null markings from the tables as this table + may be part of a sub query that is re-evaluated + */ + if (cleared_tables) + unclear_tables(join, &cleared_tables); + } + /* + An error can happen when evaluating the conds + (the join condition and piece of where clause + relevant to this join table). + */ + if (unlikely(join->thd->is_error())) + error= NESTED_LOOP_ERROR; + } + else + { + DBUG_EXECUTE_IF("show_explain_probe_do_select", + if (dbug_user_var_equals_int(join->thd, + "show_explain_probe_select_id", + join->select_lex->select_number)) + dbug_serve_apcs(join->thd, 1); + ); + + JOIN_TAB *join_tab= join->join_tab + + (join->tables_list ? join->const_tables : 0); + if (join->outer_ref_cond && !join->outer_ref_cond->val_int()) + error= NESTED_LOOP_NO_MORE_ROWS; + else + error= join->first_select(join,join_tab,0); + if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY)) + error= join->first_select(join,join_tab,1); + } + + join->thd->limit_found_rows= join->send_records - join->duplicate_rows; + + if (error == NESTED_LOOP_NO_MORE_ROWS || + unlikely(join->thd->killed == ABORT_QUERY)) + error= NESTED_LOOP_OK; + + /* + For "order by with limit", we cannot rely on send_records, but need + to use the rowcount read originally into the join_tab applying the + filesort. There cannot be any post-filtering conditions, nor any + following join_tabs in this case, so this rowcount properly represents + the correct number of qualifying rows. + */ + if (join->order) + { + // Save # of found records prior to cleanup + JOIN_TAB *sort_tab; + JOIN_TAB *join_tab= join->join_tab; + uint const_tables= join->const_tables; + + // Take record count from first non constant table or from last tmp table + if (join->aggr_tables > 0) + sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1; + else + { + DBUG_ASSERT(!join->only_const_tables()); + sort_tab= join_tab + const_tables; + } + if (sort_tab->filesort && + join->select_options & OPTION_FOUND_ROWS && + sort_tab->filesort->sortorder && + sort_tab->filesort->limit != HA_POS_ERROR) + { + join->thd->limit_found_rows= sort_tab->records; + } + } + + { + /* + The following will unlock all cursors if the command wasn't an + update command + */ + join->join_free(); // Unlock all cursors + } + if (error == NESTED_LOOP_OK) + { + /* + Sic: this branch works even if rc != 0, e.g. when + send_data above returns an error. + */ + if (unlikely(join->result->send_eof())) + rc= 1; // Don't send error + DBUG_PRINT("info",("%ld records output", (long) join->send_records)); + } + else + rc= -1; +#ifndef DBUG_OFF + if (rc) + { + DBUG_PRINT("error",("Error: do_select() failed")); + } +#endif + rc= join->thd->is_error() ? -1 : rc; + DBUG_RETURN(rc); +} + + +/** + @brief + Instantiates temporary table + + @param table Table object that describes the table to be + instantiated + @param keyinfo Description of the index (there is always one index) + @param start_recinfo Column descriptions + @param recinfo INOUT End of column descriptions + @param options Option bits + + @details + Creates tmp table and opens it. + + @return + FALSE - OK + TRUE - Error +*/ + +bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, + TMP_ENGINE_COLUMNDEF *start_recinfo, + TMP_ENGINE_COLUMNDEF **recinfo, + ulonglong options) +{ + if (table->s->db_type() == TMP_ENGINE_HTON) + { + /* + If it is not heap (in-memory) table then convert index to unique + constrain. + */ + MEM_CHECK_DEFINED(table->record[0], table->s->reclength); + if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo, + options)) + return TRUE; + // Make empty record so random data is not written to disk + empty_record(table); + table->status= STATUS_NO_RECORD; + } + if (open_tmp_table(table)) + return TRUE; + + return FALSE; +} + + +/** + @brief + Accumulate rows of the result of an aggregation operation in a tmp table + + @param join pointer to the structure providing all context info for the query + @param join_tab the JOIN_TAB object to which the operation is attached + @param end_records TRUE <=> all records were accumulated, send them further + + @details + This function accumulates records of the aggreagation operation for + the node join_tab from the execution plan in a tmp table. To add a new + record the function calls join_tab->aggr->put_records. + When there is no more records to save, in this + case the end_of_records argument == true, function tells the operation to + send records further by calling aggr->send_records(). + When all records are sent this function passes 'end_of_records' signal + further by calling sub_select() with end_of_records argument set to + true. After that aggr->end_send() is called to tell the operation that + it could end internal buffer scan. + + @note + This function is not expected to be called when dynamic range scan is + used to scan join_tab because range scans aren't used for tmp tables. + + @return + return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + enum_nested_loop_state rc; + AGGR_OP *aggr= join_tab->aggr; + + /* This function cannot be called if join_tab has no associated aggregation */ + DBUG_ASSERT(aggr != NULL); + + DBUG_ENTER("sub_select_aggr_tab"); + + if (join->thd->killed) + { + /* The user has aborted the execution of the query */ + join->thd->send_kill_message(); + DBUG_RETURN(NESTED_LOOP_KILLED); + } + + if (end_of_records) + { + rc= aggr->end_send(); + if (rc >= NESTED_LOOP_OK) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); + } + + rc= aggr->put_record(); + + DBUG_RETURN(rc); +} + + +/* + Fill the join buffer with partial records, retrieve all full matches for + them + + SYNOPSIS + sub_select_cache() + join pointer to the structure providing all context info for the + query + join_tab the first next table of the execution plan to be retrieved + end_records true when we need to perform final steps of the retrieval + + DESCRIPTION + For a given table Ti= join_tab from the sequence of tables of the chosen + execution plan T1,...,Ti,...,Tn the function just put the partial record + t1,...,t[i-1] into the join buffer associated with table Ti unless this + is the last record added into the buffer. In this case, the function + additionally finds all matching full records for all partial + records accumulated in the buffer, after which it cleans the buffer up. + If a partial join record t1,...,ti is extended utilizing a dynamic + range scan then it is not put into the join buffer. Rather all matching + records are found for it at once by the function sub_select. + + NOTES + The function implements the algorithmic schema for both Blocked Nested + Loop Join and Batched Key Access Join. The difference can be seen only at + the level of of the implementation of the put_record and join_records + virtual methods for the cache object associated with the join_tab. + The put_record method accumulates records in the cache, while the + join_records method builds all matching join records and send them into + the output stream. + + RETURN + return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. +*/ + +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + enum_nested_loop_state rc; + JOIN_CACHE *cache= join_tab->cache; + int err; + DBUG_ENTER("sub_select_cache"); + + /* + This function cannot be called if join_tab has no associated join + buffer + */ + DBUG_ASSERT(cache != NULL); + + join_tab->cache->reset_join(join); + + if (end_of_records) + { + rc= cache->join_records(FALSE); + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); + } + if (unlikely(join->thd->check_killed())) + { + /* The user has aborted the execution of the query */ + DBUG_RETURN(NESTED_LOOP_KILLED); + } + join_tab->jbuf_loops_tracker->on_scan_init(); + + if (!(err= test_if_use_dynamic_range_scan(join_tab))) + { + if (!cache->put_record()) + DBUG_RETURN(NESTED_LOOP_OK); + /* + We has decided that after the record we've just put into the buffer + won't add any more records. Now try to find all the matching + extensions for all records in the buffer. + */ + rc= cache->join_records(FALSE); + DBUG_RETURN(rc); + } + + if (err < 0) + DBUG_RETURN(NESTED_LOOP_ERROR); + + /* + TODO: Check whether we really need the call below and we can't do + without it. If it's not the case remove it. + */ + rc= cache->join_records(TRUE); + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); +} + +/** + Retrieve records ends with a given beginning from the result of a join. + + For a given partial join record consisting of records from the tables + preceding the table join_tab in the execution plan, the function + retrieves all matching full records from the result set and + send them to the result set stream. + + @note + The function effectively implements the final (n-k) nested loops + of nested loops join algorithm, where k is the ordinal number of + the join_tab table and n is the total number of tables in the join query. + It performs nested loops joins with all conjunctive predicates from + the where condition pushed as low to the tables as possible. + E.g. for the query + @code + SELECT * FROM t1,t2,t3 + WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9 + @endcode + the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1, + given the selected plan prescribes to nest retrievals of the + joined tables in the following order: t1,t2,t3. + A pushed down predicate are attached to the table which it pushed to, + at the field join_tab->select_cond. + When executing a nested loop of level k the function runs through + the rows of 'join_tab' and for each row checks the pushed condition + attached to the table. + If it is false the function moves to the next row of the + table. If the condition is true the function recursively executes (n-k-1) + remaining embedded nested loops. + The situation becomes more complicated if outer joins are involved in + the execution plan. In this case the pushed down predicates can be + checked only at certain conditions. + Suppose for the query + @code + SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a + WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL) + @endcode + the optimizer has chosen a plan with the table order t1,t2,t3. + The predicate P1=t1>2 will be pushed down to the table t1, while the + predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table + t2. But the second predicate can not be unconditionally tested right + after a row from t2 has been read. This can be done only after the + first row with t3.a=t1.a has been encountered. + Thus, the second predicate P2 is supplied with a guarded value that are + stored in the field 'found' of the first inner table for the outer join + (table t2). When the first row with t3.a=t1.a for the current row + of table t1 appears, the value becomes true. For now on the predicate + is evaluated immediately after the row of table t2 has been read. + When the first row with t3.a=t1.a has been encountered all + conditions attached to the inner tables t2,t3 must be evaluated. + Only when all of them are true the row is sent to the output stream. + If not, the function returns to the lowest nest level that has a false + attached condition. + The predicates from on expressions are also pushed down. If in the + the above example the on expression were (t3.a=t1.a AND t2.a=t1.a), + then t1.a=t2.a would be pushed down to table t2, and without any + guard. + If after the run through all rows of table t2, the first inner table + for the outer join operation, it turns out that no matches are + found for the current row of t1, then current row from table t1 + is complemented by nulls for t2 and t3. Then the pushed down predicates + are checked for the composed row almost in the same way as it had + been done for the first row with a match. The only difference is + the predicates from on expressions are not checked. + + @par + @b IMPLEMENTATION + @par + The function forms output rows for a current partial join of k + tables tables recursively. + For each partial join record ending with a certain row from + join_tab it calls sub_select that builds all possible matching + tails from the result set. + To be able check predicates conditionally items of the class + Item_func_trig_cond are employed. + An object of this class is constructed from an item of class COND + and a pointer to a guarding boolean variable. + When the value of the guard variable is true the value of the object + is the same as the value of the predicate, otherwise it's just returns + true. + To carry out a return to a nested loop level of join table t the pointer + to t is remembered in the field 'return_rtab' of the join structure. + Consider the following query: + @code + SELECT * FROM t1, + LEFT JOIN + (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a) + ON t4.a=t2.a + WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL) + @endcode + Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5 + and suppose for a given joined rows from tables t1,t2,t3 there are + no rows in the result set yet. + When first row from t5 that satisfies the on condition + t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL + becomes 'activated', as well the predicate t4.a=t2.a. But + the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until + t4.a=t2.a becomes true. + In order not to re-evaluate the predicates that were already evaluated + as attached pushed down predicates, a pointer to the the first + most inner unmatched table is maintained in join_tab->first_unmatched. + Thus, when the first row from t5 with t5.a=t3.a is found + this pointer for t5 is changed from t4 to t2. + + @par + @b STRUCTURE @b NOTES + @par + join_tab->first_unmatched points always backwards to the first inner + table of the embedding nested join, if any. + + @param join pointer to the structure providing all context info for + the query + @param join_tab the first next table of the execution plan to be retrieved + @param end_records true when we need to perform final steps of retrival + + @return + return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. +*/ + +enum_nested_loop_state +sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) +{ + DBUG_ENTER("sub_select"); + + if (join_tab->split_derived_to_update && !end_of_records) + { + table_map tab_map= join_tab->split_derived_to_update; + for (uint i= 0; tab_map; i++, tab_map>>= 1) + { + if (tab_map & 1) + join->map2table[i]->preread_init_done= false; + } + } + + /* Restore state if mark_as_null_row() have been called */ + if (join_tab->last_inner) + { + JOIN_TAB *last_inner_tab= join_tab->last_inner; + for (JOIN_TAB *jt= join_tab; jt <= last_inner_tab; jt++) + jt->table->null_row= 0; + } + else + join_tab->table->null_row=0; + + if (end_of_records) + { + enum_nested_loop_state nls= + (*join_tab->next_select)(join,join_tab+1,end_of_records); + DBUG_RETURN(nls); + } + join_tab->tracker->r_scans++; + + int error; + enum_nested_loop_state rc= NESTED_LOOP_OK; + READ_RECORD *info= &join_tab->read_record; + + + for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table; + flush_dups_table; + flush_dups_table= flush_dups_table->next_flush_table) + { + flush_dups_table->sj_weedout_delete_rows(); + } + + if (!join_tab->preread_init_done && join_tab->preread_init()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join_tab->build_range_rowid_filter_if_needed()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty()) + rc= NESTED_LOOP_NO_MORE_ROWS; + + join->return_tab= join_tab; + + if (join_tab->last_inner) + { + /* join_tab is the first inner table for an outer join operation. */ + + /* Set initial state of guard variables for this table.*/ + join_tab->found=0; + join_tab->not_null_compl= 1; + + /* Set first_unmatched for the last inner table of this group */ + join_tab->last_inner->first_unmatched= join_tab; + if (join_tab->on_precond && !join_tab->on_precond->val_int()) + rc= NESTED_LOOP_NO_MORE_ROWS; + } + join->thd->get_stmt_da()->reset_current_row_for_warning(1); + + if (rc != NESTED_LOOP_NO_MORE_ROWS && + (rc= join_tab_execution_startup(join_tab)) < 0) + DBUG_RETURN(rc); + + if (join_tab->loosescan_match_tab) + join_tab->loosescan_match_tab->found_match= FALSE; + + const bool pfs_batch_update= join_tab->pfs_batch_update(join); + if (pfs_batch_update) + join_tab->table->file->start_psi_batch_mode(); + + if (rc != NESTED_LOOP_NO_MORE_ROWS) + { + error= (*join_tab->read_first_record)(join_tab); + if (!error && join_tab->keep_current_rowid) + join_tab->table->file->position(join_tab->table->record[0]); + rc= evaluate_join_record(join, join_tab, error); + } + + /* + Note: psergey has added the 2nd part of the following condition; the + change should probably be made in 5.1, too. + */ + bool skip_over= FALSE; + while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab) + { + if (join_tab->loosescan_match_tab && + join_tab->loosescan_match_tab->found_match) + { + KEY *key= join_tab->table->key_info + join_tab->loosescan_key; + key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key, + join_tab->loosescan_key_len); + skip_over= TRUE; + } + + error= info->read_record(); + + if (skip_over && likely(!error)) + { + if (!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part, + join_tab->loosescan_buf, join_tab->loosescan_key_len)) + { + /* + This is the LooseScan action: skip over records with the same key + value if we already had a match for them. + */ + continue; + } + join_tab->loosescan_match_tab->found_match= FALSE; + skip_over= FALSE; + } + + if (join_tab->keep_current_rowid && likely(!error)) + join_tab->table->file->position(join_tab->table->record[0]); + + rc= evaluate_join_record(join, join_tab, error); + } + + if (rc == NESTED_LOOP_NO_MORE_ROWS && + join_tab->last_inner && !join_tab->found) + rc= evaluate_null_complemented_join_record(join, join_tab); + + if (pfs_batch_update) + join_tab->table->file->end_psi_batch_mode(); + + if (rc == NESTED_LOOP_NO_MORE_ROWS) + rc= NESTED_LOOP_OK; + DBUG_RETURN(rc); +} + +/** + @brief Process one row of the nested loop join. + + This function will evaluate parts of WHERE/ON clauses that are + applicable to the partial row on hand and in case of success + submit this row to the next level of the nested loop. + + @param join - The join object + @param join_tab - The most inner join_tab being processed + @param error > 0: Error, terminate processing + = 0: (Partial) row is available + < 0: No more rows available at this level + @return Nested loop state (Ok, No_more_rows, Error, Killed) +*/ + +static enum_nested_loop_state +evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, + int error) +{ + bool shortcut_for_distinct= join_tab->shortcut_for_distinct; + ha_rows found_records=join->found_records; + COND *select_cond= join_tab->select_cond; + bool select_cond_result= TRUE; + + DBUG_ENTER("evaluate_join_record"); + DBUG_PRINT("enter", + ("evaluate_join_record join: %p join_tab: %p " + "cond: %p abort: %d alias %s", + join, join_tab, select_cond, error, + join_tab->table->alias.ptr())); + + if (error > 0 || unlikely(join->thd->is_error())) // Fatal error + DBUG_RETURN(NESTED_LOOP_ERROR); + if (error < 0) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + if (unlikely(join->thd->check_killed())) // Aborted by user + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + + join_tab->tracker->r_rows++; + + if (select_cond) + { + select_cond_result= MY_TEST(select_cond->val_int()); + + /* check for errors evaluating the condition */ + if (unlikely(join->thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + + if (!select_cond || select_cond_result) + { + /* + There is no select condition or the attached pushed down + condition is true => a match is found. + */ + join_tab->tracker->r_rows_after_where++; + + bool found= 1; + while (join_tab->first_unmatched && found) + { + /* + The while condition is always false if join_tab is not + the last inner join table of an outer join operation. + */ + JOIN_TAB *first_unmatched= join_tab->first_unmatched; + /* + Mark that a match for current outer table is found. + This activates push down conditional predicates attached + to the all inner tables of the outer join. + */ + first_unmatched->found= 1; + for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++) + { + /* + Check whether 'not exists' optimization can be used here. + If tab->table->reginfo.not_exists_optimize is set to true + then WHERE contains a conjunctive predicate IS NULL over + a non-nullable field of tab. When activated this predicate + will filter out all records with matches for the left part + of the outer join whose inner tables start from the + first_unmatched table and include table tab. To safely use + 'not exists' optimization we have to check that the + IS NULL predicate is really activated, i.e. all guards + that wrap it are in the 'open' state. + */ + bool not_exists_opt_is_applicable= + tab->table->reginfo.not_exists_optimize; + for (JOIN_TAB *first_upper= first_unmatched->first_upper; + not_exists_opt_is_applicable && first_upper; + first_upper= first_upper->first_upper) + { + if (!first_upper->found) + not_exists_opt_is_applicable= false; + } + /* Check all predicates that has just been activated. */ + /* + Actually all predicates non-guarded by first_unmatched->found + will be re-evaluated again. It could be fixed, but, probably, + it's not worth doing now. + */ + if (tab->select_cond) + { + const longlong res= tab->select_cond->val_int(); + if (join->thd->is_error()) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (!res) + { + /* The condition attached to table tab is false */ + if (tab == join_tab) + { + found= 0; + if (not_exists_opt_is_applicable) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + } + else + { + /* + Set a return point if rejected predicate is attached + not to the last table of the current nest level. + */ + join->return_tab= tab; + if (not_exists_opt_is_applicable) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + else + DBUG_RETURN(NESTED_LOOP_OK); + } + } + } + } + /* + Check whether join_tab is not the last inner table + for another embedding outer join. + */ + if ((first_unmatched= first_unmatched->first_upper) && + first_unmatched->last_inner != join_tab) + first_unmatched= 0; + join_tab->first_unmatched= first_unmatched; + } + + JOIN_TAB *return_tab= join->return_tab; + join_tab->found_match= TRUE; + + if (join_tab->check_weed_out_table && found) + { + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); + DBUG_PRINT("info", ("weedout_check: %d", res)); + if (res == -1) + DBUG_RETURN(NESTED_LOOP_ERROR); + else if (res == 1) + found= FALSE; + } + else if (join_tab->do_firstmatch) + { + /* + We should return to the join_tab->do_firstmatch after we have + enumerated all the suffixes for current prefix row combination + */ + return_tab= join_tab->do_firstmatch; + } + + /* + It was not just a return to lower loop level when one + of the newly activated predicates is evaluated as false + (See above join->return_tab= tab). + */ + join->join_examined_rows++; + DBUG_PRINT("counts", ("join->examined_rows++: %lu found: %d", + (ulong) join->join_examined_rows, (int) found)); + + if (found) + { + enum enum_nested_loop_state rc; + /* A match from join_tab is found for the current partial join. */ + rc= (*join_tab->next_select)(join, join_tab+1, 0); + join->thd->get_stmt_da()->inc_current_row_for_warning(); + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + DBUG_RETURN(rc); + if (return_tab < join->return_tab) + join->return_tab= return_tab; + + /* check for errors evaluating the condition */ + if (unlikely(join->thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); + + if (join->return_tab < join_tab) + DBUG_RETURN(NESTED_LOOP_OK); + /* + Test if this was a SELECT DISTINCT query on a table that + was not in the field list; In this case we can abort if + we found a row, as no new rows can be added to the result. + */ + if (shortcut_for_distinct && found_records != join->found_records) + DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); + + DBUG_RETURN(NESTED_LOOP_OK); + } + } + else + { + /* + The condition pushed down to the table join_tab rejects all rows + with the beginning coinciding with the current partial join. + */ + join->join_examined_rows++; + } + + join->thd->get_stmt_da()->inc_current_row_for_warning(); + join_tab->read_record.unlock_row(join_tab); + + DBUG_RETURN(NESTED_LOOP_OK); +} + +/** + + @details + Construct a NULL complimented partial join record and feed it to the next + level of the nested loop. This function is used in case we have + an OUTER join and no matching record was found. +*/ + +static enum_nested_loop_state +evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab) +{ + /* + The table join_tab is the first inner table of a outer join operation + and no matches has been found for the current outer row. + */ + JOIN_TAB *last_inner_tab= join_tab->last_inner; + /* Cache variables for faster loop */ + COND *select_cond; + for ( ; join_tab <= last_inner_tab ; join_tab++) + { + /* Change the the values of guard predicate variables. */ + join_tab->found= 1; + join_tab->not_null_compl= 0; + /* The outer row is complemented by nulls for each inner tables */ + restore_record(join_tab->table,s->default_values); // Make empty record + mark_as_null_row(join_tab->table); // For group by without error + select_cond= join_tab->select_cond; + /* Check all attached conditions for inner table rows. */ + if (select_cond && !select_cond->val_int()) + return NESTED_LOOP_OK; + } + join_tab--; + /* + The row complemented by nulls might be the first row + of embedding outer joins. + If so, perform the same actions as in the code + for the first regular outer join row above. + */ + for ( ; ; ) + { + JOIN_TAB *first_unmatched= join_tab->first_unmatched; + if ((first_unmatched= first_unmatched->first_upper) && + first_unmatched->last_inner != join_tab) + first_unmatched= 0; + join_tab->first_unmatched= first_unmatched; + if (!first_unmatched) + break; + first_unmatched->found= 1; + for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++) + { + if (tab->select_cond && !tab->select_cond->val_int()) + { + join->return_tab= tab; + return NESTED_LOOP_OK; + } + } + } + /* + The row complemented by nulls satisfies all conditions + attached to inner tables. + */ + if (join_tab->check_weed_out_table) + { + int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd); + if (res == -1) + return NESTED_LOOP_ERROR; + else if (res == 1) + return NESTED_LOOP_OK; + } + else if (join_tab->do_firstmatch) + { + /* + We should return to the join_tab->do_firstmatch after we have + enumerated all the suffixes for current prefix row combination + */ + if (join_tab->do_firstmatch < join->return_tab) + join->return_tab= join_tab->do_firstmatch; + } + + /* + Send the row complemented by nulls to be joined with the + remaining tables. + */ + return (*join_tab->next_select)(join, join_tab+1, 0); +} + +/***************************************************************************** + The different ways to read a record + Returns -1 if row was not found, 0 if row was found and 1 on errors +*****************************************************************************/ + +/** Help function when we get some an error from the table handler. */ + +int report_error(TABLE *table, int error) +{ + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + table->status= STATUS_GARBAGE; + return -1; // key not found; ok + } + /* + Locking reads can legally return also these errors, do not + print them to the .err log + */ + if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT + && error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed) + sql_print_error("Got error %d when reading table '%s'", + error, table->s->path.str); + table->file->print_error(error,MYF(0)); + return 1; +} + + +int safe_index_read(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + if (unlikely((error= + table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT)))) + return report_error(table, error); + return 0; +} + + +/** + Reads content of constant table + + @param tab table + @param pos position of table in query plan + + @retval 0 ok, one row was found or one NULL-complemented row was created + @retval -1 ok, no row was found and no NULL-complemented row was created + @retval 1 error +*/ + +static int +join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos) +{ + int error; + TABLE_LIST *tbl; + DBUG_ENTER("join_read_const_table"); + TABLE *table=tab->table; + table->const_table=1; + table->null_row=0; + table->status=STATUS_NO_RECORD; + + if (tab->table->pos_in_table_list->is_materialized_derived() && + !tab->table->pos_in_table_list->fill_me) + { + //TODO: don't get here at all + /* Skip materialized derived tables/views. */ + DBUG_RETURN(0); + } + else if (tab->table->pos_in_table_list->jtbm_subselect && + tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab) + { + /* Row will not be found */ + int res; + if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found) + res= 0; + else + res= -1; + DBUG_RETURN(res); + } + else if (tab->type == JT_SYSTEM) + { + if (unlikely((error=join_read_system(tab)))) + { // Info for DESCRIBE + tab->info= ET_CONST_ROW_NOT_FOUND; + /* Mark for EXPLAIN that the row was not found */ + pos->records_read=0.0; + pos->ref_depend_map= 0; + if (!table->pos_in_table_list->outer_join || error > 0) + DBUG_RETURN(error); + } + /* + The optimizer trust the engine that when stats.records is 0, there + was no found rows + */ + DBUG_ASSERT(table->file->stats.records > 0 || error); + } + else + { + if (/*!table->file->key_read && */ + table->covering_keys.is_set(tab->ref.key) && !table->no_keyread && + (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY) + { + table->file->ha_start_keyread(tab->ref.key); + tab->index= tab->ref.key; + } + error=join_read_const(tab); + table->file->ha_end_keyread(); + if (unlikely(error)) + { + tab->info= ET_UNIQUE_ROW_NOT_FOUND; + /* Mark for EXPLAIN that the row was not found */ + pos->records_read=0.0; + pos->ref_depend_map= 0; + if (!table->pos_in_table_list->outer_join || error > 0) + DBUG_RETURN(error); + } + } + /* + Evaluate an on-expression only if it is not considered expensive. + This mainly prevents executing subqueries in optimization phase. + This is necessary since proper setup for such execution has not been + done at this stage. + */ + if (*tab->on_expr_ref && !table->null_row && + !(*tab->on_expr_ref)->is_expensive()) + { +#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL) + /* + This test could be very useful to find bugs in the optimizer + where we would call this function with an expression that can't be + evaluated yet. We can't have this enabled by default as long as + have items like Item_equal, that doesn't report they are const but + they can still be called even if they contain not const items. + */ + (*tab->on_expr_ref)->update_used_tables(); + DBUG_ASSERT((*tab->on_expr_ref)->const_item()); +#endif + if ((table->null_row= MY_TEST((*tab->on_expr_ref)->val_int() == 0))) + mark_as_null_row(table); + } + if (!table->null_row && ! tab->join->mixed_implicit_grouping) + table->maybe_null= 0; + + { + JOIN *join= tab->join; + List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables); + /* Check appearance of new constant items in Item_equal objects */ + if (join->conds) + update_const_equal_items(thd, join->conds, tab, TRUE); + while ((tbl= ti++)) + { + TABLE_LIST *embedded; + TABLE_LIST *embedding= tbl; + do + { + embedded= embedding; + if (embedded->on_expr) + update_const_equal_items(thd, embedded->on_expr, tab, TRUE); + embedding= embedded->embedding; + } + while (embedding && + embedding->nested_join->join_list.head() == embedded); + } + } + DBUG_RETURN(0); +} + + +/** + Read a constant table when there is at most one matching row, using a table + scan. + + @param tab Table to read + + @retval 0 Row was found + @retval -1 Row was not found + @retval 1 Got an error (other than row not found) during read +*/ +static int +join_read_system(JOIN_TAB *tab) +{ + TABLE *table= tab->table; + int error; + if (table->status & STATUS_GARBAGE) // If first read + { + if (unlikely((error= + table->file->ha_read_first_row(table->record[0], + table->s->primary_key)))) + { + if (error != HA_ERR_END_OF_FILE) + return report_error(table, error); + table->const_table= 1; + mark_as_null_row(tab->table); + empty_record(table); // Make empty record + return -1; + } + store_record(table,record[1]); + } + else if (!table->status) // Only happens with left join + restore_record(table,record[1]); // restore old record + table->null_row=0; + return table->status ? -1 : 0; +} + + +/** + Read a table when there is at most one matching row. + + @param tab Table to read + + @retval 0 Row was found + @retval -1 Row was not found + @retval 1 Got an error (other than row not found) during read +*/ + +static int +join_read_const(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + if (table->status & STATUS_GARBAGE) // If first read + { + table->status= 0; + if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref)) + error=HA_ERR_KEY_NOT_FOUND; + else + { + error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key, + (uchar*) tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT); + } + if (unlikely(error)) + { + table->status= STATUS_NOT_FOUND; + mark_as_null_row(tab->table); + empty_record(table); + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; + } + store_record(table,record[1]); + } + else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join + { + table->status=0; + restore_record(table,record[1]); // restore old record + } + table->null_row=0; + return table->status ? -1 : 0; +} + +/* + eq_ref access method implementation: "read_first" function + + SYNOPSIS + join_read_key() + tab JOIN_TAB of the accessed table + + DESCRIPTION + This is "read_fist" function for the eq_ref access method. The difference + from ref access function is that is that it has a one-element lookup + cache (see cmp_buffer_with_ref) + + RETURN + 0 - Ok + -1 - Row not found + 1 - Error +*/ + + +static int +join_read_key(JOIN_TAB *tab) +{ + return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref); +} + + +/* + eq_ref access handler but generalized a bit to support TABLE and TABLE_REF + not from the join_tab. See join_read_key for detailed synopsis. +*/ +int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref) +{ + int error; + if (!table->file->inited) + { + error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE); + if (unlikely(error)) + { + (void) report_error(table, error); + return 1; + } + } + + /* + The following is needed when one makes ref (or eq_ref) access from row + comparisons: one must call row->bring_value() to get the new values. + */ + if (tab && tab->bush_children) + { + TABLE_LIST *emb_sj_nest= tab->bush_children->start->emb_sj_nest; + emb_sj_nest->sj_subq_pred->left_exp()->bring_value(); + } + + /* TODO: Why don't we do "Late NULLs Filtering" here? */ + + if (cmp_buffer_with_ref(thd, table, table_ref) || + (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW))) + { + if (table_ref->key_err) + { + table->status=STATUS_NOT_FOUND; + return -1; + } + /* + Moving away from the current record. Unlock the row + in the handler if it did not match the partial WHERE. + */ + if (tab && tab->ref.has_record && tab->ref.use_count == 0) + { + tab->read_record.table->file->unlock_row(); + table_ref->has_record= FALSE; + } + error=table->file->ha_index_read_map(table->record[0], + table_ref->key_buff, + make_prev_keypart_map(table_ref->key_parts), + HA_READ_KEY_EXACT); + if (unlikely(error) && + error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + + if (likely(!error)) + { + table_ref->has_record= TRUE; + table_ref->use_count= 1; + } + } + else if (table->status == 0) + { + DBUG_ASSERT(table_ref->has_record); + table_ref->use_count++; + } + table->null_row=0; + return table->status ? -1 : 0; +} + + +/** + Since join_read_key may buffer a record, do not unlock + it if it was not used in this invocation of join_read_key(). + Only count locks, thus remembering if the record was left unused, + and unlock already when pruning the current value of + TABLE_REF buffer. + @sa join_read_key() +*/ + +static void +join_read_key_unlock_row(st_join_table *tab) +{ + DBUG_ASSERT(tab->ref.use_count); + if (tab->ref.use_count) + tab->ref.use_count--; +} + +/** + Rows from const tables are read once but potentially used + multiple times during execution of a query. + Ensure such rows are never unlocked during query execution. +*/ + +void +join_const_unlock_row(JOIN_TAB *tab) +{ + DBUG_ASSERT(tab->type == JT_CONST); +} + + +/* + ref access method implementation: "read_first" function + + SYNOPSIS + join_read_always_key() + tab JOIN_TAB of the accessed table + + DESCRIPTION + This is "read_fist" function for the "ref" access method. + + The functon must leave the index initialized when it returns. + ref_or_null access implementation depends on that. + + RETURN + 0 - Ok + -1 - Row not found + 1 - Error +*/ + +static int +join_read_always_key(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + /* Initialize the index first */ + if (!table->file->inited) + { + if (unlikely((error= table->file->ha_index_init(tab->ref.key, + tab->sorted)))) + { + (void) report_error(table, error); + return 1; + } + } + + if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref))) + return -1; + if (unlikely((error= + table->file->prepare_index_key_scan_map(tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts))))) + { + report_error(table,error); + return -1; + } + if ((error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_KEY_EXACT))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; /* purecov: inspected */ + } + return 0; +} + + +/** + This function is used when optimizing away ORDER BY in + SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC. +*/ + +static int +join_read_last_key(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + if (!table->file->inited && + unlikely((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))) + { + (void) report_error(table, error); + return 1; + } + + if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref))) + return -1; + if (unlikely((error= + table->file->prepare_index_key_scan_map(tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts)))) ) + { + report_error(table,error); + return -1; + } + if (unlikely((error= + table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab->ref.key_parts), + HA_READ_PREFIX_LAST)))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + return report_error(table, error); + return -1; /* purecov: inspected */ + } + return 0; +} + + + /* ARGSUSED */ +static int +join_no_more_records(READ_RECORD *info __attribute__((unused))) +{ + return -1; +} + + +static int +join_read_next_same(READ_RECORD *info) +{ + int error; + TABLE *table= info->table; + JOIN_TAB *tab=table->reginfo.join_tab; + + if (unlikely((error= table->file->ha_index_next_same(table->record[0], + tab->ref.key_buff, + tab->ref.key_length)))) + { + if (error != HA_ERR_END_OF_FILE) + return report_error(table, error); + table->status= STATUS_GARBAGE; + return -1; + } + return 0; +} + + +static int +join_read_prev_same(READ_RECORD *info) +{ + int error; + TABLE *table= info->table; + JOIN_TAB *tab=table->reginfo.join_tab; + + if (unlikely((error= table->file->ha_index_prev(table->record[0])))) + return report_error(table, error); + if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key, + tab->ref.key_length)) + { + table->status=STATUS_NOT_FOUND; + error= -1; + } + return error; +} + + +static int +join_init_quick_read_record(JOIN_TAB *tab) +{ + quick_select_return res= test_if_quick_select(tab); + + if (res == SQL_SELECT::ERROR) + return 1; /* Fatal error */ + + if (res == SQL_SELECT::IMPOSSIBLE_RANGE) + return -1; /* No possible records */ + + /* + Proceed to read rows. If we've created a quick select, use it, otherwise + do a full scan. + */ + return join_init_read_record(tab); +} + + +int read_first_record_seq(JOIN_TAB *tab) +{ + if (unlikely(tab->read_record.table->file->ha_rnd_init_with_error(1))) + return 1; + return tab->read_record.read_record(); +} + + +/* + @brief + Create a new (dynamic) quick select. +*/ + +static quick_select_return +test_if_quick_select(JOIN_TAB *tab) +{ + DBUG_EXECUTE_IF("show_explain_probe_test_if_quick_select", + if (dbug_user_var_equals_int(tab->join->thd, + "show_explain_probe_select_id", + tab->join->select_lex->select_number)) + dbug_serve_apcs(tab->join->thd, 1); + ); + + + delete tab->select->quick; + tab->select->quick=0; + + if (tab->table->file->inited != handler::NONE) + tab->table->file->ha_index_or_rnd_end(); + + quick_select_return res; + res= tab->select->test_quick_select(tab->join->thd, tab->keys, + (table_map) 0, HA_POS_ERROR, 0, + FALSE, /*remove where parts*/FALSE, + FALSE, /* no warnings */ TRUE); + if (tab->explain_plan && tab->explain_plan->range_checked_fer) + tab->explain_plan->range_checked_fer->collect_data(tab->select->quick); + + return res; +} + + +/* + @return + 1 - Yes, use dynamically built range + 0 - No, don't use dynamic range (but there's no error) + -1 - Fatal error +*/ + +static +int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) +{ + if (unlikely(join_tab->use_quick == 2)) + { + quick_select_return res= test_if_quick_select(join_tab); + if (res == SQL_SELECT::ERROR) + return -1; + else + { + /* Both OK and IMPOSSIBLE_RANGE go here */ + return join_tab->select->quick ? 1 : 0; + } + } + else + return 0; +} + +int join_init_read_record(JOIN_TAB *tab) +{ + bool need_unpacking= FALSE; + JOIN *join= tab->join; + /* + Note: the query plan tree for the below operations is constructed in + save_agg_explain_data. + */ + if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. + return 1; + + if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + + if (tab->build_range_rowid_filter_if_needed()) + return 1; + + if (tab->filesort && tab->sort_table()) // Sort table. + return 1; + + DBUG_EXECUTE_IF("kill_join_init_read_record", + tab->join->thd->set_killed(KILL_QUERY);); + + + if (!tab->preread_init_done && tab->preread_init()) + return 1; + + if (tab->select && tab->select->quick && tab->select->quick->reset()) + { + /* Ensures error status is propagated back to client */ + report_error(tab->table, + tab->join->thd->killed ? HA_ERR_QUERY_INTERRUPTED : HA_ERR_OUT_OF_MEM); + return 1; + } + /* make sure we won't get ER_QUERY_INTERRUPTED from any code below */ + DBUG_EXECUTE_IF("kill_join_init_read_record", + tab->join->thd->reset_killed();); + + Copy_field *save_copy, *save_copy_end; + + /* + init_read_record resets all elements of tab->read_record(). + Remember things that we don't want to have reset. + */ + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1; + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + } + + return tab->read_record.read_record(); +} + + +/* + Helper function for sorting table with filesort. +*/ + +bool +JOIN_TAB::sort_table() +{ + int rc; + DBUG_PRINT("info",("Sorting for index")); + THD_STAGE_INFO(join->thd, stage_creating_sort_index); + DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ? + JOIN::ordered_index_order_by : + JOIN::ordered_index_group_by)); + rc= create_sort_index(join->thd, join, this, NULL); + /* Disactivate rowid filter if it was used when creating sort index */ + if (rowid_filter) + table->file->rowid_filter_is_active= false; + return (rc != 0); +} + + +static int +join_read_first(JOIN_TAB *tab) +{ + int error= 0; + TABLE *table=tab->table; + DBUG_ENTER("join_read_first"); + + DBUG_ASSERT(table->no_keyread || + !table->covering_keys.is_set(tab->index) || + table->file->keyread == tab->index); + tab->table->status=0; + tab->read_record.read_record_func= join_read_next; + tab->read_record.table=table; + if (!table->file->inited) + error= table->file->ha_index_init(tab->index, tab->sorted); + if (likely(!error)) + error= table->file->prepare_index_scan(); + if (unlikely(error) || + unlikely(error= tab->table->file->ha_index_first(tab->table->record[0]))) + { + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + report_error(table, error); + DBUG_RETURN(-1); + } + DBUG_RETURN(0); +} + + +static int +join_read_next(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_index_next(info->record())))) + return report_error(info->table, error); + + return 0; +} + + +static int +join_read_last(JOIN_TAB *tab) +{ + TABLE *table=tab->table; + int error= 0; + DBUG_ENTER("join_read_last"); + + DBUG_ASSERT(table->no_keyread || + !table->covering_keys.is_set(tab->index) || + table->file->keyread == tab->index); + tab->table->status=0; + tab->read_record.read_record_func= join_read_prev; + tab->read_record.table=table; + if (!table->file->inited) + error= table->file->ha_index_init(tab->index, 1); + if (likely(!error)) + error= table->file->prepare_index_scan(); + if (unlikely(error) || + unlikely(error= tab->table->file->ha_index_last(tab->table->record[0]))) + DBUG_RETURN(report_error(table, error)); + + DBUG_RETURN(0); +} + + +static int +join_read_prev(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_index_prev(info->record())))) + return report_error(info->table, error); + return 0; +} + + +static int +join_ft_read_first(JOIN_TAB *tab) +{ + int error; + TABLE *table= tab->table; + + if (!table->file->inited && + (error= table->file->ha_index_init(tab->ref.key, 1))) + { + (void) report_error(table, error); + return 1; + } + + table->file->ft_init(); + + if (unlikely((error= table->file->ha_ft_read(table->record[0])))) + return report_error(table, error); + return 0; +} + +static int +join_ft_read_next(READ_RECORD *info) +{ + int error; + if (unlikely((error= info->table->file->ha_ft_read(info->record())))) + return report_error(info->table, error); + return 0; +} + + +/** + Reading of key with key reference and one part that may be NULL. +*/ + +int +join_read_always_key_or_null(JOIN_TAB *tab) +{ + int res; + + /* First read according to key which is NOT NULL */ + *tab->ref.null_ref_key= 0; // Clear null byte + if ((res= join_read_always_key(tab)) >= 0) + return res; + + /* Then read key with null value */ + *tab->ref.null_ref_key= 1; // Set null byte + return safe_index_read(tab); +} + + +int +join_read_next_same_or_null(READ_RECORD *info) +{ + int error; + if (unlikely((error= join_read_next_same(info)) >= 0)) + return error; + JOIN_TAB *tab= info->table->reginfo.join_tab; + + /* Test if we have already done a read after null key */ + if (*tab->ref.null_ref_key) + return -1; // All keys read + *tab->ref.null_ref_key= 1; // Set null byte + return safe_index_read(tab); // then read null keys +} + + +/***************************************************************************** + DESCRIPTION + Functions that end one nested loop iteration. Different functions + are used to support GROUP BY clause and to redirect records + to a table (e.g. in case of SELECT into a temporary table) or to the + network client. + + RETURN VALUES + NESTED_LOOP_OK - the record has been successfully handled + NESTED_LOOP_ERROR - a fatal error (like table corruption) + was detected + NESTED_LOOP_KILLED - thread shutdown was requested while processing + the record + NESTED_LOOP_QUERY_LIMIT - the record has been successfully handled; + additionally, the nested loop produced the + number of rows specified in the LIMIT clause + for the query + NESTED_LOOP_CURSOR_LIMIT - the record has been successfully handled; + additionally, there is a cursor and the nested + loop algorithm produced the number of rows + that is specified for current cursor fetch + operation. + All return values except NESTED_LOOP_OK abort the nested loop. +*****************************************************************************/ + +/* ARGSUSED */ +static enum_nested_loop_state +end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + DBUG_ENTER("end_send"); + /* + When all tables are const this function is called with jointab == NULL. + This function shouldn't be called for the first join_tab as it needs + to get fields from previous tab. + */ + DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab); + //TODO pass fields via argument + List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields; + + if (end_of_records) + { + if (join->procedure && join->procedure->end_of_records()) + DBUG_RETURN(NESTED_LOOP_ERROR); + DBUG_RETURN(NESTED_LOOP_OK); + } + + if (join->table_count && + join->join_tab->is_using_loose_index_scan()) + { + /* Copy non-aggregated fields when loose index scan is used. */ + copy_fields(&join->tmp_table_param); + } + if (join->having && join->having->val_int() == 0) + DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having + if (join->procedure) + { + if (join->procedure->send_row(join->procedure_fields_list)) + DBUG_RETURN(NESTED_LOOP_ERROR); + DBUG_RETURN(NESTED_LOOP_OK); + } + + if (join->send_records >= join->unit->lim.get_select_limit() && + join->unit->lim.is_with_ties()) + { + /* + Stop sending rows if the order fields corresponding to WITH TIES + have changed. + */ + int idx= test_if_item_cache_changed(join->order_fields); + if (idx >= 0) + join->do_send_rows= false; + } + + if (join->do_send_rows) + { + int error; + /* result < 0 if row was not accepted and should not be counted */ + if (unlikely((error= join->result->send_data_with_check(*fields, + join->unit, + join->send_records)))) + { + if (error > 0) + DBUG_RETURN(NESTED_LOOP_ERROR); + // error < 0 => duplicate row + join->duplicate_rows++; + } + } + + join->send_records++; + join->accepted_rows++; + if (join->send_records >= join->unit->lim.get_select_limit()) + { + if (!join->do_send_rows) + { + /* + If we have used Priority Queue for optimizing order by with limit, + then stop here, there are no more records to consume. + When this optimization is used, end_send is called on the next + join_tab. + */ + if (join->order && + join->select_options & OPTION_FOUND_ROWS && + join_tab > join->join_tab && + (join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq) + { + DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT")); + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); + } + DBUG_RETURN(NESTED_LOOP_OK); + } + + /* For WITH TIES we keep sending rows until a group has changed. */ + if (join->unit->lim.is_with_ties()) + { + /* Prepare the order_fields comparison for with ties. */ + if (join->send_records == join->unit->lim.get_select_limit()) + (void) test_if_group_changed(join->order_fields); + /* One more loop, to check if the next row matches with_ties or not. */ + DBUG_RETURN(NESTED_LOOP_OK); + } + if (join->select_options & OPTION_FOUND_ROWS) + { + JOIN_TAB *jt=join->join_tab; + if ((join->table_count == 1) && !join->sort_and_group + && !join->send_group_parts && !join->having && !jt->select_cond && + !(jt->select && jt->select->quick) && + (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && + (jt->ref.key < 0)) + { + /* Join over all rows in table; Return number of found rows */ + TABLE *table=jt->table; + + if (jt->filesort_result) // If filesort was used + { + join->send_records= jt->filesort_result->found_rows; + } + else + { + table->file->info(HA_STATUS_VARIABLE); + join->send_records= table->file->stats.records; + } + } + else + { + join->do_send_rows= 0; + if (join->unit->fake_select_lex) + join->unit->fake_select_lex->limit_params.select_limit= 0; + DBUG_RETURN(NESTED_LOOP_OK); + } + } + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely + } + else if (join->send_records >= join->fetch_limit) + { + /* + There is a server side cursor and all rows for + this fetch request are sent. + */ + DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT); + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform OrderedGroupBy operation and write the output into join->result. + + @detail + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and send the group. + + Note that the output comes in the GROUP BY order, which is required by + the MySQL's GROUP BY semantics. No further sorting is needed. + + @seealso end_write_group() also implements SortAndGroup +*/ + +enum_nested_loop_state +end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + int idx= -1; + enum_nested_loop_state ok_code= NESTED_LOOP_OK; + /* + join_tab can be 0 in the case all tables are const tables and we did not + need a temporary table to store the result. + In this case we use the original given fields, which is stored in + join->fields. + */ + List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields; + DBUG_ENTER("end_send_group"); + + if (!join->items3.is_null() && !join->set_group_rpa) + { + /* Move ref_pointer_array to points to items3 */ + join->set_group_rpa= true; + join->set_items_ref_array(join->items3); + } + + if (!join->first_record || end_of_records || + (idx=test_if_group_changed(join->group_fields)) >= 0) + { + + if (!join->group_sent && + (join->first_record || + (end_of_records && !join->group && !join->group_optimized_away))) + { + table_map cleared_tables= (table_map) 0; + if (join->procedure) + join->procedure->end_group(); + /* Test if there was a group change. */ + if (idx < (int) join->send_group_parts) + { + int error=0; + if (join->procedure) + { + if (join->having && join->having->val_int() == 0) + error= -1; // Didn't satisfy having + else + { + if (join->do_send_rows) + error=join->procedure->send_row(*fields) ? 1 : 0; + join->send_records++; + } + if (end_of_records && join->procedure->end_of_records()) + error= 1; // Fatal error + } + else + { + /* Reset all sum functions on group change. */ + if (!join->first_record) + { + /* No matching rows for group function */ + + List_iterator_fast<Item> it(*fields); + Item *item; + join->no_rows_in_result_called= 1; + + join->clear(&cleared_tables); + while ((item= it++)) + item->no_rows_in_result(); + } + if (join->having && join->having->val_int() == 0) + error= -1; // Didn't satisfy having + else + { + if (join->do_send_rows) + { + error= join->result->send_data_with_check(*fields, + join->unit, + join->send_records); + if (unlikely(error < 0)) + { + /* Duplicate row, don't count */ + join->duplicate_rows++; + error= 0; + } + } + join->send_records++; + join->group_sent= true; + } + if (unlikely(join->rollup.state != ROLLUP::STATE_NONE && error <= 0)) + { + if (join->rollup_send_data((uint) (idx+1))) + error= 1; + } + if (join->no_rows_in_result_called) + { + /* Restore null tables to original state */ + join->no_rows_in_result_called= 0; + if (cleared_tables) + unclear_tables(join, &cleared_tables); + } + } + if (unlikely(error > 0)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + if (join->send_records >= join->unit->lim.get_select_limit() && + join->do_send_rows) + { + /* WITH TIES can be computed during end_send_group if + the order by is a subset of group by and we had an index + available to compute group by order directly. */ + if (!join->unit->lim.is_with_ties() || + idx < (int)join->with_ties_order_count) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely + join->do_send_rows= 0; + join->unit->lim.set_unlimited(); + } + } + else if (join->send_records >= join->fetch_limit) + { + /* + There is a server side cursor and all rows + for this fetch request are sent. + + Preventing code duplication. When finished with the group reset + the group functions and copy_fields. We fall through. bug #11904 + */ + ok_code= NESTED_LOOP_CURSOR_LIMIT; + } + } + } + else + { + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + join->first_record=1; + (void) test_if_group_changed(join->group_fields); + } + if (idx < (int) join->send_group_parts) + { + /* + This branch is executed also for cursors which have finished their + fetch limit - the reason for ok_code. + */ + copy_fields(&join->tmp_table_param); + if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1])) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (join->procedure) + join->procedure->add(); + join->group_sent= false; + join->accepted_rows++; + DBUG_RETURN(ok_code); + } + } + if (update_sum_func(join->sum_funcs)) + DBUG_RETURN(NESTED_LOOP_ERROR); + join->accepted_rows++; + if (join->procedure) + join->procedure->add(); + DBUG_RETURN(NESTED_LOOP_OK); +} + + + /* ARGSUSED */ +static enum_nested_loop_state +end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *const table= join_tab->table; + DBUG_ENTER("end_write"); + + if (!end_of_records) + { + copy_fields(join_tab->tmp_table_param); + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + + if (likely(!join_tab->having || join_tab->having->val_int())) + { + int error; + join->found_records++; + join->accepted_rows++; + if ((error= table->file->ha_write_tmp_row(table->record[0]))) + { + if (likely(!table->file->is_fatal_error(error, HA_CHECK_DUP))) + goto end; // Ignore duplicate keys + bool is_duplicate; + if (create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 1, &is_duplicate)) + DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error + if (is_duplicate) + goto end; + table->s->uniques=0; // To ensure rows are the same + } + if (++join_tab->send_records >= + join_tab->tmp_table_param->end_write_records && + join->do_send_rows) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); + join->do_send_rows=0; + join->unit->lim.set_unlimited(); + } + } + } +end: + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform GROUP BY operation over rows coming in arbitrary order: use + TemporaryTableWithPartialSums algorithm. + + @detail + The TemporaryTableWithPartialSums algorithm is: + + CREATE TEMPORARY TABLE tmp ( + group_by_columns PRIMARY KEY, + partial_sum + ); + + for each row R in join output { + INSERT INTO tmp (R.group_by_columns, R.sum_value) + ON DUPLICATE KEY UPDATE partial_sum=partial_sum + R.sum_value; + } + + @detail + Also applies HAVING, etc. + + @seealso end_unique_update() +*/ + +static enum_nested_loop_state +end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *const table= join_tab->table; + ORDER *group; + int error; + DBUG_ENTER("end_update"); + + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + + join->found_records++; + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + /* Make a key of group index */ + for (group=table->group ; group ; group=group->next) + { + Item *item= *group->item; + if (group->fast_field_copier_setup != group->field) + { + DBUG_PRINT("info", ("new setup %p -> %p", + group->fast_field_copier_setup, + group->field)); + group->fast_field_copier_setup= group->field; + group->fast_field_copier_func= + item->setup_fast_field_copier(group->field); + } + item->save_org_in_field(group->field, group->fast_field_copier_func); + /* Store in the used key if the field was 0 */ + if (item->maybe_null()) + group->buff[-1]= (char) group->field->is_null(); + } + if (!table->file->ha_index_read_map(table->record[1], + join_tab->tmp_table_param->group_buff, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT)) + { /* Update old record */ + restore_record(table,record[1]); + update_tmptable_sum_func(join->sum_funcs,table); + if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], + table->record[0])))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + goto end; + } + + init_tmptable_sum_functions(join->sum_funcs); + if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy, + join->thd))) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + if (create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 0, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error + /* Change method to update rows */ + if (unlikely((error= table->file->ha_index_init(0, 0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + + join_tab->aggr->set_write_func(end_unique_update); + } + join_tab->send_records++; +end: + join->accepted_rows++; // For rownum() + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/** + Like end_update, but this is done with unique constraints instead of keys. +*/ + +static enum_nested_loop_state +end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *table= join_tab->table; + int error; + DBUG_ENTER("end_unique_update"); + + if (end_of_records) + DBUG_RETURN(NESTED_LOOP_OK); + + init_tmptable_sum_functions(join->sum_funcs); + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + + join->accepted_rows++; + if (likely(!(error= table->file->ha_write_tmp_row(table->record[0])))) + join_tab->send_records++; // New group + else + { + if (unlikely((int) table->file->get_dup_key(error) < 0)) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + /* Prepare table for random positioning */ + bool rnd_inited= (table->file->inited == handler::RND); + if (!rnd_inited && + ((error= table->file->ha_index_end()) || + (error= table->file->ha_rnd_init(0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + if (unlikely(table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + restore_record(table,record[1]); + update_tmptable_sum_func(join->sum_funcs,table); + if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], + table->record[0])))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + if (!rnd_inited && + ((error= table->file->ha_rnd_end()) || + (error= table->file->ha_index_init(0, 0)))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + join->accepted_rows++; // For rownum() + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/* + @brief + Perform OrderedGroupBy operation and write the output into the temporary + table (join_tab->table). + + @detail + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and write the group. + + @seealso end_send_group() also implements OrderedGroupBy +*/ + +enum_nested_loop_state +end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), + bool end_of_records) +{ + TABLE *table= join_tab->table; + int idx= -1; + DBUG_ENTER("end_write_group"); + + join->accepted_rows++; + if (!join->first_record || end_of_records || + (idx=test_if_group_changed(join->group_fields)) >= 0) + { + if (join->first_record || (end_of_records && !join->group)) + { + table_map cleared_tables= (table_map) 0; + if (join->procedure) + join->procedure->end_group(); + int send_group_parts= join->send_group_parts; + if (idx < send_group_parts) + { + if (!join->first_record) + { + /* No matching rows for group function */ + join->clear(&cleared_tables); + } + copy_sum_funcs(join->sum_funcs, + join->sum_funcs_end[send_group_parts]); + if (!join_tab->having || join_tab->having->val_int()) + { + int error= table->file->ha_write_tmp_row(table->record[0]); + if (unlikely(error) && + create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 0, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + if (unlikely(join->rollup.state != ROLLUP::STATE_NONE)) + { + if (unlikely(join->rollup_write_data((uint) (idx+1), + join_tab->tmp_table_param, + table))) + { + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + if (cleared_tables) + unclear_tables(join, &cleared_tables); + if (end_of_records) + goto end; + } + } + else + { + if (end_of_records) + goto end; + join->first_record=1; + (void) test_if_group_changed(join->group_fields); + } + if (idx < (int) join->send_group_parts) + { + copy_fields(join_tab->tmp_table_param); + if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy, + join->thd))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(init_sum_functions(join->sum_funcs, + join->sum_funcs_end[idx+1]))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(join->procedure)) + join->procedure->add(); + goto end; + } + } + if (unlikely(update_sum_func(join->sum_funcs))) + DBUG_RETURN(NESTED_LOOP_ERROR); + if (unlikely(join->procedure)) + join->procedure->add(); +end: + if (unlikely(join->thd->check_killed())) + { + DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ + } + DBUG_RETURN(NESTED_LOOP_OK); +} + + +/***************************************************************************** + Remove calculation with tables that aren't yet read. Remove also tests + against fields that are read through key where the table is not a + outer join table. + We can't remove tests that are made against columns which are stored + in sorted order. +*****************************************************************************/ + +/** + Check if "left_item=right_item" equality is guaranteed to be true by use of + [eq]ref access on left_item->field->table. + + SYNOPSIS + test_if_ref() + root_cond + left_item + right_item + + DESCRIPTION + Check if the given "left_item = right_item" equality is guaranteed to be + true by use of [eq_]ref access method. + + We need root_cond as we can't remove ON expressions even if employed ref + access guarantees that they are true. This is because TODO + + RETURN + TRUE if right_item is used removable reference key on left_item + FALSE Otherwise + +*/ + +bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) +{ + Field *field=left_item->field; + JOIN_TAB *join_tab= field->table->reginfo.join_tab; + // No need to change const test + if (!field->table->const_table && join_tab && + !join_tab->is_ref_for_hash_join() && + (!join_tab->first_inner || + *join_tab->first_inner->on_expr_ref == root_cond)) + { + /* + If ref access uses "Full scan on NULL key" (i.e. it actually alternates + between ref access and full table scan), then no equality can be + guaranteed to be true. + */ + if (join_tab->ref.is_access_triggered()) + return FALSE; + + Item *ref_item=part_of_refkey(field->table,field); + if (ref_item && (ref_item->eq(right_item,1) || + ref_item->real_item()->eq(right_item,1))) + { + right_item= right_item->real_item(); + if (right_item->type() == Item::FIELD_ITEM) + return (field->eq_def(((Item_field *) right_item)->field)); + /* remove equalities injected by IN->EXISTS transformation */ + else if (right_item->type() == Item::CACHE_ITEM) + return ((Item_cache *)right_item)->eq_def (field); + if (right_item->const_item() && !(right_item->is_null())) + { + /* + We can remove binary fields and numerical fields except float, + as float comparison isn't 100 % safe + We have to keep normal strings to be able to check for end spaces + */ + if (field->binary() && + field->real_type() != MYSQL_TYPE_STRING && + field->real_type() != MYSQL_TYPE_VARCHAR && + (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0)) + { + return !right_item->save_in_field_no_warnings(field, 1); + } + } + } + } + return 0; // keep test +} + + +/** + Extract a condition that can be checked after reading given table + @fn make_cond_for_table() + + @param cond Condition to analyze + @param tables Tables for which "current field values" are available + @param used_table Table that we're extracting the condition for + tables Tables for which "current field values" are available (this + includes used_table) + (may also include PSEUDO_TABLE_BITS, and may be zero) + @param join_tab_idx_arg + The index of the JOIN_TAB this Item is being extracted + for. MAX_TABLES if there is no corresponding JOIN_TAB. + @param exclude_expensive_cond + Do not push expensive conditions + @param retain_ref_cond + Retain ref conditions + + @retval <>NULL Generated condition + @retval =NULL Already checked, OR error + + @details + Extract the condition that can be checked after reading the table + specified in 'used_table', given that current-field values for tables + specified in 'tables' bitmap are available. + If 'used_table' is 0 + - extract conditions for all tables in 'tables'. + - extract conditions are unrelated to any tables + in the same query block/level(i.e. conditions + which have used_tables == 0). + + The function assumes that + - Constant parts of the condition has already been checked. + - Condition that could be checked for tables in 'tables' has already + been checked. + + The function takes into account that some parts of the condition are + guaranteed to be true by employed 'ref' access methods (the code that + does this is located at the end, search down for "EQ_FUNC"). + + @note + Make sure to keep the implementations of make_cond_for_table() and + make_cond_after_sjm() synchronized. + make_cond_for_info_schema() uses similar algorithm as well. +*/ + +static Item * +make_cond_for_table(THD *thd, Item *cond, table_map tables, + table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond __attribute__((unused)), + bool retain_ref_cond) +{ + return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, true); +} + + +static Item * +make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, + table_map tables, table_map used_table, + int join_tab_idx_arg, + bool exclude_expensive_cond __attribute__ + ((unused)), + bool retain_ref_cond, + bool is_top_and_level) + +{ + table_map rand_table_bit= (table_map) RAND_TABLE_BIT; + + if (used_table && !(cond->used_tables() & used_table)) + return (COND*) 0; // Already checked + + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + /* + Special handling of top level conjuncts with RAND_TABLE_BIT: + if such a conjunct contains a reference to a field that is not + an outer field then it is pushed to the corresponding table by + the same rule as all other conjuncts. Otherwise, if the conjunct + is used in WHERE is is pushed to the last joined table, if is it + is used in ON condition of an outer join it is pushed into the + last inner table of the outer join. Such conjuncts are pushed in + a call of make_cond_for_table_from_pred() with the + parameter 'used_table' equal to PSEUDO_TABLE_BITS. + */ + if (is_top_and_level && used_table == rand_table_bit && + (item->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* The conjunct with RAND_TABLE_BIT has been allready pushed */ + continue; + } + Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, + tables, used_table, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, false); + if (fix) + new_cond->argument_list()->push_back(fix, thd->mem_root); + else if (thd->is_error()) + return ((COND*) 0); + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; // Always true + case 1: + return new_cond->argument_list()->head(); + default: + /* + Call fix_fields to propagate all properties of the children to + the new parent Item. This should not be expensive because all + children of Item_cond_and should be fixed by now. + */ + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; + new_cond->used_tables_cache= + ((Item_cond_and*) cond)->used_tables_cache & + tables; + return new_cond; + } + } + else + { // Or list + if (is_top_and_level && used_table == rand_table_bit && + (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* This top level formula with RAND_TABLE_BIT has been already pushed */ + return (COND*) 0; + } + + Item_cond_or *new_cond=new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, + tables, 0L, + join_tab_idx_arg, + exclude_expensive_cond, + retain_ref_cond, false); + if (!fix) + return (COND*) 0; // Always true or error + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + /* + Call fix_fields to propagate all properties of the children to + the new parent Item. This should not be expensive because all + children of Item_cond_and should be fixed by now. + */ + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + else if (cond->basic_const_item()) + return cond; + + if (is_top_and_level && used_table == rand_table_bit && + (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit) + { + /* This top level formula with RAND_TABLE_BIT has been already pushed */ + return (COND*) 0; + } + + /* + Because the following test takes a while and it can be done + table_count times, we mark each item that we have examined with the result + of the test + */ + if ((cond->marker == MARKER_CHECK_ON_READ && !retain_ref_cond) || + (cond->used_tables() & ~tables)) + return (COND*) 0; // Can't check this yet + + if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK) + { + cond->set_join_tab_idx((uint8) join_tab_idx_arg); + return cond; // Not boolean op + } + + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::EQ_FUNC) + { + Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item(); + Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item(); + if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond && + test_if_ref(root_cond, (Item_field*) left_item,right_item)) + { + cond->marker= MARKER_CHECK_ON_READ; // Checked when read + return (COND*) 0; + } + if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond && + test_if_ref(root_cond, (Item_field*) right_item,left_item)) + { + cond->marker= MARKER_CHECK_ON_READ; // Checked when read + return (COND*) 0; + } + } + cond->marker= MARKER_PROCESSED; + cond->set_join_tab_idx((uint8) join_tab_idx_arg); + return cond; +} + + +/* + The difference of this from make_cond_for_table() is that we're in the + following state: + 1. conditions referring to 'tables' have been checked + 2. conditions referring to sjm_tables have been checked, too + 3. We need condition that couldn't be checked in #1 or #2 but + can be checked when we get both (tables | sjm_tables). + +*/ +static COND * +make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables, + table_map sjm_tables, bool inside_or_clause) +{ + /* + We assume that conditions that refer to only join prefix tables or + sjm_tables have already been checked. + */ + if (!inside_or_clause) + { + table_map cond_used_tables= cond->used_tables(); + if((!(cond_used_tables & ~tables) || + !(cond_used_tables & ~sjm_tables))) + return (COND*) 0; // Already checked + } + + /* AND/OR recursive descent */ + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix=make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables, + inside_or_clause); + if (fix) + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; // Always true + case 1: + return new_cond->argument_list()->head(); + default: + /* + Item_cond_and do not need fix_fields for execution, its parameters + are fixed or do not need fix_fields, too + */ + new_cond->quick_fix_field(); + new_cond->used_tables_cache= + ((Item_cond_and*) cond)->used_tables_cache & + tables; + return new_cond; + } + } + else + { // Or list + Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd); + if (!new_cond) + return (COND*) 0; // OOM /* purecov: inspected */ + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables, + /*inside_or_clause= */TRUE); + if (!fix) + return (COND*) 0; // Always true + new_cond->argument_list()->push_back(fix, thd->mem_root); + } + /* + Item_cond_or do not need fix_fields for execution, its parameters + are fixed or do not need fix_fields, too + */ + new_cond->quick_fix_field(); + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + + /* + Because the following test takes a while and it can be done + table_count times, we mark each item that we have examined with the result + of the test + */ + + if (cond->marker == MARKER_CHECK_ON_READ || + (cond->used_tables() & ~(tables | sjm_tables))) + return (COND*) 0; // Can't check this yet + if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK) + return cond; // Not boolean op + + /* + Remove equalities that are guaranteed to be true by use of 'ref' access + method + */ + if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC) + { + Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item(); + Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item(); + if (left_item->type() == Item::FIELD_ITEM && + test_if_ref(root_cond, (Item_field*) left_item,right_item)) + { + cond->marker= MARKER_CHECK_ON_READ; + return (COND*) 0; + } + if (right_item->type() == Item::FIELD_ITEM && + test_if_ref(root_cond, (Item_field*) right_item,left_item)) + { + cond->marker= MARKER_CHECK_ON_READ; + return (COND*) 0; + } + } + cond->marker= MARKER_PROCESSED; + return cond; +} + + +/* + @brief + + Check if + - @table uses "ref"-like access + - it is based on "@field=certain_item" equality + - the equality will be true for any record returned by the access method + and return the certain_item if yes. + + @detail + + Equality won't necessarily hold if: + - the used index covers only part of the @field. + Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup + for 'abc', you will get both record with 'abc' and with 'abcde'. + - The type of access is actually ref_or_null, and so @field can be either + a value or NULL. + + @return + Item that the field will be equal to + NULL if no such item +*/ + +static Item * +part_of_refkey(TABLE *table,Field *field) +{ + JOIN_TAB *join_tab= table->reginfo.join_tab; + if (!join_tab) + return (Item*) 0; // field from outer non-select (UPDATE,...) + + uint ref_parts= join_tab->ref.key_parts; + if (ref_parts) /* if it's ref/eq_ref/ref_or_null */ + { + uint key= join_tab->ref.key; + KEY *key_info= join_tab->get_keyinfo_by_key_no(key); + KEY_PART_INFO *key_part= key_info->key_part; + + for (uint part=0 ; part < ref_parts ; part++,key_part++) + { + if (field->eq(key_part->field)) + { + /* + Found the field in the key. Check that + 1. ref_or_null doesn't alternate this component between a value and + a NULL + 2. index fully covers the key + */ + if (part != join_tab->ref.null_ref_part && // (1) + !(key_part->key_part_flag & HA_PART_KEY_SEG)) // (2) + { + return join_tab->ref.items[part]; + } + break; + } + } + } + return (Item*) 0; +} + + +/** + Test if one can use the key to resolve ORDER BY. + + @param join if not NULL, can use the join's top-level + multiple-equalities. + @param order Sort order + @param table Table to sort + @param idx Index to check + @param used_key_parts [out] NULL by default, otherwise return value for + used key parts. + + + @note + used_key_parts is set to correct key parts used if return value != 0 + (On other cases, used_key_part may be changed) + Note that the value may actually be greater than the number of index + key parts. This can happen for storage engines that have the primary + key parts as a suffix for every secondary key. + + @retval + 1 key is ok. + @retval + 0 Key can't be used + @retval + -1 Reverse key can be used +*/ + +static int test_if_order_by_key(JOIN *join, + ORDER *order, TABLE *table, uint idx, + uint *used_key_parts) +{ + KEY_PART_INFO *key_part,*key_part_end; + key_part=table->key_info[idx].key_part; + key_part_end=key_part + table->key_info[idx].ext_key_parts; + key_part_map const_key_parts=table->const_key_parts[idx]; + uint user_defined_kp= table->key_info[idx].user_defined_key_parts; + int reverse=0; + uint key_parts; + bool have_pk_suffix= false; + uint pk= table->s->primary_key; + DBUG_ENTER("test_if_order_by_key"); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + table->key_info[idx].ext_key_part_map && + pk != MAX_KEY && pk != idx) + { + have_pk_suffix= true; + } + + for (; order ; order=order->next, const_key_parts>>=1) + { + Item_field *item_field= ((Item_field*) (*order->item)->real_item()); + Field *field= item_field->field; + int flag; + + /* + Skip key parts that are constants in the WHERE clause. + These are already skipped in the ORDER BY by const_expression_in_where() + */ + for (; const_key_parts & 1 ; const_key_parts>>= 1) + key_part++; + + /* + This check was in this function historically (although I think it's + better to check it outside of this function): + + "Test if the primary key parts were all const (i.e. there's one row). + The sorting doesn't matter" + + So, we're checking that + (1) this is an extended key + (2) we've reached its end + */ + key_parts= (uint)(key_part - table->key_info[idx].key_part); + if (have_pk_suffix && + reverse == 0 && // all were =const so far + key_parts == table->key_info[idx].ext_key_parts && + table->const_key_parts[pk] == PREV_BITS(uint, + table->key_info[pk]. + user_defined_key_parts)) + { + key_parts= 0; + reverse= 1; // Key is ok to use + goto ok; + } + + if (key_part == key_part_end) + { + /* + There are some items left in ORDER BY that we don't + */ + DBUG_RETURN(0); + } + + if (key_part->field != field) + { + /* + Check if there is a multiple equality that allows to infer that field + and key_part->field are equal + (see also: compute_part_of_sort_key_for_equals) + */ + if (item_field->item_equal && + item_field->item_equal->contains(key_part->field)) + field= key_part->field; + } + if (key_part->field != field || !field->part_of_sortkey.is_set(idx)) + DBUG_RETURN(0); + + const ORDER::enum_order keypart_order= + (key_part->key_part_flag & HA_REVERSE_SORT) ? + ORDER::ORDER_DESC : ORDER::ORDER_ASC; + /* set flag to 1 if we can use read-next on key, else to -1 */ + flag= (order->direction == keypart_order) ? 1 : -1; + if (reverse && flag != reverse) + DBUG_RETURN(0); + reverse=flag; // Remember if reverse + if (key_part < key_part_end) + key_part++; + } + + key_parts= (uint) (key_part - table->key_info[idx].key_part); + + if (reverse == -1 && + !(table->file->index_flags(idx, user_defined_kp-1, 1) & HA_READ_PREV)) + reverse= 0; // Index can't be used + + if (have_pk_suffix && reverse == -1) + { + uint pk_parts= table->key_info[pk].user_defined_key_parts; + if (!(table->file->index_flags(pk, pk_parts-1, 1) & HA_READ_PREV)) + reverse= 0; // Index can't be used + } + +ok: + *used_key_parts= key_parts; + DBUG_RETURN(reverse); +} + + +/** + Find shortest key suitable for full table scan. + + @param table Table to scan + @param usable_keys Allowed keys + + @return + MAX_KEY no suitable key found + key index otherwise +*/ + +uint find_shortest_key(TABLE *table, const key_map *usable_keys) +{ + double min_cost= DBL_MAX; + uint best= MAX_KEY; + if (!usable_keys->is_clear_all()) + { + for (uint nr=0; nr < table->s->keys ; nr++) + { + if (usable_keys->is_set(nr)) + { + double cost= table->file->keyread_time(nr, 1, table->file->records()); + if (cost < min_cost) + { + min_cost= cost; + best=nr; + } + DBUG_ASSERT(best < MAX_KEY); + } + } + } + return best; +} + +/** + Test if a second key is the subkey of the first one. + + @param key_part First key parts + @param ref_key_part Second key parts + @param ref_key_part_end Last+1 part of the second key + + @note + Second key MUST be shorter than the first one. + + @retval + 1 is a subkey + @retval + 0 no sub key +*/ + +inline bool +is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part, + KEY_PART_INFO *ref_key_part_end) +{ + for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++) + if (!key_part->field->eq(ref_key_part->field)) + return 0; + return 1; +} + +/** + Test if we can use one of the 'usable_keys' instead of 'ref' key + for sorting. + + @param ref Number of key, used for WHERE clause + @param usable_keys Keys for testing + + @return + - MAX_KEY If we can't use other key + - the number of found key Otherwise +*/ + +static uint +test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, + const key_map *usable_keys) +{ + uint nr; + uint min_length= (uint) ~0; + uint best= MAX_KEY; + KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part; + KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts; + + /* + Find the shortest key that + - produces the required ordering + - has key #ref (up to ref_key_parts) as its subkey. + */ + for (nr= 0 ; nr < table->s->keys ; nr++) + { + uint not_used; + if (usable_keys->is_set(nr) && + table->key_info[nr].key_length < min_length && + table->key_info[nr].user_defined_key_parts >= ref_key_parts && + is_subkey(table->key_info[nr].key_part, ref_key_part, + ref_key_part_end) && + test_if_order_by_key(NULL, order, table, nr, ¬_used)) + { + min_length= table->key_info[nr].key_length; + best= nr; + } + } + return best; +} + + +/** + Check if GROUP BY/DISTINCT can be optimized away because the set is + already known to be distinct. + + Used in removing the GROUP BY/DISTINCT of the following types of + statements: + @code + SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref> + [GROUP BY <unique_key_cols>,...] + @endcode + + If (a,b,c is distinct) + then <any combination of a,b,c>,{whatever} is also distinct + + This function checks if all the key parts of any of the unique keys + of the table are referenced by a list : either the select list + through find_field_in_item_list or GROUP BY list through + find_field_in_order_list. + If the above holds and the key parts cannot contain NULLs then we + can safely remove the GROUP BY/DISTINCT, + as no result set can be more distinct than an unique key. + + @param table The table to operate on. + @param find_func function to iterate over the list and search + for a field + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +list_contains_unique_index(TABLE *table, + bool (*find_func) (Field *, void *), void *data) +{ + for (uint keynr= 0; keynr < table->s->keys; keynr++) + { + if (keynr == table->s->primary_key || + (table->key_info[keynr].flags & HA_NOSAME)) + { + KEY *keyinfo= table->key_info + keynr; + KEY_PART_INFO *key_part, *key_part_end; + + for (key_part=keyinfo->key_part, + key_part_end=key_part+ keyinfo->user_defined_key_parts; + key_part < key_part_end; + key_part++) + { + if (key_part->field->maybe_null() || + !find_func(key_part->field, data)) + break; + } + if (key_part == key_part_end) + return 1; + } + } + return 0; +} + + +/** + Helper function for list_contains_unique_index. + Find a field reference in a list of ORDER structures. + Finds a direct reference of the Field in the list. + + @param field The field to search for. + @param data ORDER *.The list to search in + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +find_field_in_order_list (Field *field, void *data) +{ + ORDER *group= (ORDER *) data; + bool part_found= 0; + for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next) + { + Item *item= (*tmp_group->item)->real_item(); + if (item->type() == Item::FIELD_ITEM && + ((Item_field*) item)->field->eq(field)) + { + part_found= 1; + break; + } + } + return part_found; +} + + +/** + Helper function for list_contains_unique_index. + Find a field reference in a dynamic list of Items. + Finds a direct reference of the Field in the list. + + @param[in] field The field to search for. + @param[in] data List<Item> *.The list to search in + + @retval + 1 found + @retval + 0 not found. +*/ + +static bool +find_field_in_item_list (Field *field, void *data) +{ + List<Item> *fields= (List<Item> *) data; + bool part_found= 0; + List_iterator<Item> li(*fields); + Item *item; + + while ((item= li++)) + { + if (item->real_item()->type() == Item::FIELD_ITEM && + ((Item_field*) (item->real_item()))->field->eq(field)) + { + part_found= 1; + break; + } + } + return part_found; +} + + +/* + Fill *col_keys with a union of Field::part_of_sortkey of all fields + that belong to 'table' and are equal to 'item_field'. +*/ + +void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, + Item_field *item_field, + key_map *col_keys) +{ + col_keys->clear_all(); + col_keys->merge(item_field->field->part_of_sortkey); + + if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP)) + return; + + Item_equal *item_eq= NULL; + + if (item_field->item_equal) + { + /* + The item_field is from ORDER structure, but it already has an item_equal + pointer set (UseMultipleEqualitiesToRemoveTempTable code have set it) + */ + item_eq= item_field->item_equal; + } + else + { + /* + Walk through join's muliple equalities and find the one that contains + item_field. + */ + if (!join->cond_equal) + return; + table_map needed_tbl_map= item_field->used_tables() | table->map; + List_iterator<Item_equal> li(join->cond_equal->current_level); + Item_equal *cur_item_eq; + while ((cur_item_eq= li++)) + { + if ((cur_item_eq->used_tables() & needed_tbl_map) && + cur_item_eq->contains(item_field->field)) + { + item_eq= cur_item_eq; + item_field->item_equal= item_eq; // Save the pointer to our Item_equal. + break; + } + } + } + + if (item_eq) + { + Item_equal_fields_iterator it(*item_eq); + Item *item; + /* Loop through other members that belong to table table */ + while ((item= it++)) + { + if (item->type() == Item::FIELD_ITEM && + ((Item_field*)item)->field->table == table) + { + col_keys->merge(((Item_field*)item)->field->part_of_sortkey); + } + } + } +} + + +/** + Test if we can skip the ORDER BY by using an index. + + If we can use an index, the JOIN_TAB / tab->select struct + is changed to use the index. + + The index must cover all fields in <order>, or it will not be considered. + + @param no_changes No changes will be made to the query plan. + @param fatal_error OUT A fatal error occurred + + @todo + - sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + + @retval + 0 We have to use filesort to do the sorting + @retval + 1 We can use an index. +*/ + +static bool +test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, + bool no_changes, const key_map *map, bool *fatal_error) +{ + int ref_key; + uint UNINIT_VAR(ref_key_parts); + int order_direction= 0; + uint used_key_parts= 0; + TABLE *table=tab->table; + SQL_SELECT *select=tab->select; + key_map usable_keys; + QUICK_SELECT_I *save_quick= select ? select->quick : 0; + Item *orig_cond= 0; + bool orig_cond_saved= false; + int best_key= -1; + bool changed_key= false; + THD *thd= tab->join->thd; + Json_writer_object trace_wrapper(thd); + Json_writer_array trace_arr(thd, "test_if_skip_sort_order"); + DBUG_ENTER("test_if_skip_sort_order"); + + *fatal_error= false; + /* Check that we are always called with first non-const table */ + DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables); + + /* Sorting a single row can always be skipped */ + if (tab->type == JT_EQ_REF || + tab->type == JT_CONST || + tab->type == JT_SYSTEM) + { + Json_writer_object trace_skip(thd); + trace_skip.add("skipped", "single row access method"); + DBUG_RETURN(1); + } + + /* + Keys disabled by ALTER TABLE ... DISABLE KEYS should have already + been taken into account. + */ + usable_keys= *map; + + /* Find indexes that cover all ORDER/GROUP BY fields */ + for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= (*tmp_order->item)->real_item(); + if (item->type() != Item::FIELD_ITEM) + { + usable_keys.clear_all(); + DBUG_RETURN(0); + } + + /* + Take multiple-equalities into account. Suppose we have + ORDER BY col1, col10 + and there are + multiple-equal(col1, col2, col3), + multiple-equal(col10, col11). + + Then, + - when item=col1, we find the set of indexes that cover one of {col1, + col2, col3} + - when item=col10, we find the set of indexes that cover one of {col10, + col11} + + And we compute an intersection of these sets to find set of indexes that + cover all ORDER BY components. + */ + key_map col_keys; + compute_part_of_sort_key_for_equals(tab->join, table, (Item_field*)item, + &col_keys); + usable_keys.intersect(col_keys); + if (usable_keys.is_clear_all()) + goto use_filesort; // No usable keys + } + + ref_key= -1; + /* Test if constant range in WHERE */ + if (tab->ref.key >= 0 && tab->ref.key_parts) + { + ref_key= tab->ref.key; + ref_key_parts= tab->ref.key_parts; + /* + todo: why does JT_REF_OR_NULL mean filesort? We could find another index + that satisfies the ordering. I would just set ref_key=MAX_KEY here... + */ + if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT || + tab->ref.uses_splitting) + goto use_filesort; + } + else if (select && select->quick) // Range found by opt_range + { + int quick_type= select->quick->get_type(); + /* + assume results are not ordered when index merge is used + TODO: sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + */ + + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) + { + /* + we set ref_key=MAX_KEY instead of -1, because test_if_cheaper ordering + assumes that "ref_key==-1" means doing full index scan. + (This is not very straightforward and we got into this situation for + historical reasons. Should be fixed at some point). + */ + ref_key= MAX_KEY; + } + else + { + ref_key= select->quick->index; + ref_key_parts= select->quick->used_key_parts; + } + } + + if (ref_key >= 0 && ref_key != MAX_KEY) + { + /* Current access method uses index ref_key with ref_key_parts parts */ + if (!usable_keys.is_set(ref_key)) + { + /* However, ref_key doesn't match the needed ordering */ + uint new_ref_key; + + /* + If using index only read, only consider other possible index only + keys + */ + if (table->covering_keys.is_set(ref_key)) + usable_keys.intersect(table->covering_keys); + if (tab->pre_idx_push_select_cond) + { + orig_cond= tab->set_cond(tab->pre_idx_push_select_cond); + orig_cond_saved= true; + } + + if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts, + &usable_keys)) < MAX_KEY) + { + /* + Index new_ref_key + - produces the required ordering, + - also has the same columns as ref_key for #ref_key_parts (this + means we will read the same number of rows as with ref_key). + */ + + /* + If new_ref_key allows to construct a quick select which uses more key + parts than ref(new_ref_key) would, do that. + + Otherwise, construct a ref access (todo: it's not clear what is the + win in using ref access when we could use quick select also?) + */ + if ((table->opt_range_keys.is_set(new_ref_key) && + table->opt_range[new_ref_key].key_parts > ref_key_parts) || + !(tab->ref.key >= 0)) + { + /* + The range optimizer constructed QUICK_RANGE for ref_key, and + we want to use instead new_ref_key as the index. We can't + just change the index of the quick select, because this may + result in an inconsistent QUICK_SELECT object. Below we + create a new QUICK_SELECT from scratch so that all its + parameters are set correctly by the range optimizer. + */ + key_map new_ref_key_map; + COND *save_cond; + quick_select_return res; + new_ref_key_map.clear_all(); // Force the creation of quick select + new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key. + + /* Reset quick; This will be restored in 'use_filesort' if needed */ + select->quick= 0; + save_cond= select->cond; + if (select->pre_idx_push_select_cond) + select->cond= select->pre_idx_push_select_cond; + res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0, + (tab->join->select_options & + OPTION_FOUND_ROWS) ? + HA_POS_ERROR : + tab->join->unit-> + lim.get_select_limit(), + TRUE, TRUE, FALSE, FALSE); + // if we cannot use quick select + if (res != SQL_SELECT::OK || !tab->select->quick) + { + if (res == SQL_SELECT::ERROR) + *fatal_error= true; + select->cond= save_cond; + goto use_filesort; + } + tab->type= JT_ALL; + tab->ref.key= -1; + tab->ref.key_parts= 0; + tab->use_quick= 1; + best_key= new_ref_key; + /* + We don't restore select->cond as we want to use the + original condition as index condition pushdown is not + active for the new index. + todo: why not perform index condition pushdown for the new index? + */ + } + else + { + /* + We'll use ref access method on key new_ref_key. In general case + the index search tuple for new_ref_key will be different (e.g. + when one index is defined as (part1, part2, ...) and another as + (part1, part2(N), ...) and the WHERE clause contains + "part1 = const1 AND part2=const2". + So we build tab->ref from scratch here. + */ + KEYUSE *keyuse= tab->keyuse; + while (keyuse->key != new_ref_key && keyuse->table == tab->table) + keyuse++; + if (create_ref_for_key(tab->join, tab, keyuse, FALSE, + (tab->join->const_table_map | + OUTER_REF_TABLE_BIT))) + goto use_filesort; + + pick_table_access_method(tab); + } + + ref_key= new_ref_key; + changed_key= true; + } + } + /* Check if we get the rows in requested sorted order by using the key */ + if (usable_keys.is_set(ref_key) && + (order_direction= test_if_order_by_key(tab->join, order,table,ref_key, + &used_key_parts))) + goto check_reverse_order; + } + { + uint UNINIT_VAR(best_key_parts); + uint saved_best_key_parts= 0; + int best_key_direction= 0; + JOIN *join= tab->join; + ha_rows table_records= table->stat_records(); + + test_if_cheaper_ordering(tab, order, table, usable_keys, + ref_key, select_limit, + &best_key, &best_key_direction, + &select_limit, &best_key_parts, + &saved_best_key_parts); + + /* + filesort() and join cache are usually faster than reading in + index order and not using join cache, except in case that chosen + index is clustered key. + */ + if (best_key < 0 || + ((select_limit >= table_records) && + (tab->type == JT_ALL && + tab->join->table_count > tab->join->const_tables + 1) && + !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX))) + goto use_filesort; + + if (select && // psergey: why doesn't this use a quick? + table->opt_range_keys.is_set(best_key) && best_key != ref_key) + { + key_map tmp_map; + tmp_map.clear_all(); // Force the creation of quick select + tmp_map.set_bit(best_key); // only best_key. + select->quick= 0; + + bool cond_saved= false; + Item *saved_cond; + + /* + Index Condition Pushdown may have removed parts of the condition for + this table. Temporarily put them back because we want the whole + condition for the range analysis. + */ + if (select->pre_idx_push_select_cond) + { + saved_cond= select->cond; + select->cond= select->pre_idx_push_select_cond; + cond_saved= true; + } + + quick_select_return res; + res = select->test_quick_select(join->thd, tmp_map, 0, + join->select_options & OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->lim.get_select_limit(), + TRUE, FALSE, FALSE, FALSE); + if (res == SQL_SELECT::ERROR) + { + *fatal_error= true; + goto use_filesort; + } + + if (cond_saved) + select->cond= saved_cond; + } + order_direction= best_key_direction; + /* + saved_best_key_parts is actual number of used keyparts found by the + test_if_order_by_key function. It could differ from keyinfo->user_defined_key_parts, + thus we have to restore it in case of desc order as it affects + QUICK_SELECT_DESC behaviour. + */ + used_key_parts= (order_direction == -1) ? + saved_best_key_parts : best_key_parts; + changed_key= true; + } + +check_reverse_order: + DBUG_ASSERT(order_direction != 0); + + if (order_direction == -1) // If ORDER BY ... DESC + { + int quick_type; + if (select && select->quick) + { + /* + Don't reverse the sort order, if it's already done. + (In some cases test_if_order_by_key() can be called multiple times + */ + if (select->quick->reverse_sorted()) + goto skipped_filesort; + + quick_type= select->quick->get_type(); + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + tab->limit= 0; + goto use_filesort; // Use filesort + } + } + } + + /* + Update query plan with access pattern for doing ordered access + according to what we have decided above. + */ + if (!no_changes) // We are allowed to update QEP + { + if (best_key >= 0) + { + bool quick_created= + (select && select->quick && select->quick!=save_quick); + + if (!quick_created) + { + if (select) // Throw any existing quick select + select->quick= 0; // Cleanup either reset to save_quick, + // or 'delete save_quick' + tab->index= best_key; + tab->read_first_record= order_direction > 0 ? + join_read_first:join_read_last; + tab->type=JT_NEXT; // Read with index_first(), index_next() + + /* + Currently usage of rowid filters is not supported in InnoDB + if the table is accessed by the primary key + */ + if (tab->rowid_filter && + table->file->is_clustering_key(tab->index)) + { + tab->range_rowid_filter_info= 0; + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + + if (tab->pre_idx_push_select_cond) + { + tab->set_cond(tab->pre_idx_push_select_cond); + /* + orig_cond is a part of pre_idx_push_cond, + no need to restore it. + */ + orig_cond= 0; + orig_cond_saved= false; + } + + table->file->ha_index_or_rnd_end(); + if (tab->join->select_options & SELECT_DESCRIBE) + { + tab->ref.key= -1; + tab->ref.key_parts= 0; + if (select_limit < table->stat_records()) + tab->limit= select_limit; + } + } + else if (tab->type != JT_ALL || tab->select->quick) + { + /* + We're about to use a quick access to the table. + We need to change the access method so as the quick access + method is actually used. + */ + DBUG_ASSERT(tab->select->quick); + tab->type=JT_ALL; + tab->use_quick=1; + tab->ref.key= -1; + tab->ref.key_parts=0; // Don't use ref key. + tab->range_rowid_filter_info= 0; + if (tab->rowid_filter) + { + delete tab->rowid_filter; + tab->rowid_filter= 0; + } + tab->read_first_record= join_init_read_record; + if (tab->is_using_loose_index_scan()) + tab->join->tmp_table_param.precomputed_group_by= TRUE; + + /* + Restore the original condition as changes done by pushdown + condition are not relevant anymore + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + /* + TODO: update the number of records in join->best_positions[tablenr] + */ + } + } // best_key >= 0 + + if (order_direction == -1) // If ORDER BY ... DESC + { + if (select && select->quick) + { + /* ORDER BY range_key DESC */ + QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts); + if (!tmp) + { + tab->limit= 0; + goto use_filesort; // Reverse sort failed -> filesort + } + /* + Cancel Pushed Index Condition, as it doesn't work for reverse scans. + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + if (select->quick == save_quick) + save_quick= 0; // make_reverse() consumed it + select->set_quick(tmp); + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + } + else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL && + tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts) + { + /* + SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC + + Use a traversal function that starts by reading the last row + with key part (A) and then traverse the index backwards. + */ + tab->read_first_record= join_read_last_key; + tab->read_record.read_record_func= join_read_prev_same; + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + /* + Cancel Pushed Index Condition, as it doesn't work for reverse scans. + */ + if (tab->select && tab->select->pre_idx_push_select_cond) + { + tab->set_cond(tab->select->pre_idx_push_select_cond); + tab->table->file->cancel_pushed_idx_cond(); + } + } + } + else if (select && select->quick) + { + /* Cancel "Range checked for each record" */ + if (tab->use_quick == 2) + { + tab->use_quick= 1; + tab->read_first_record= join_init_read_record; + } + select->quick->need_sorted_output(); + } + + if (tab->type == JT_EQ_REF) + tab->read_record.unlock_row= join_read_key_unlock_row; + else if (tab->type == JT_CONST) + tab->read_record.unlock_row= join_const_unlock_row; + else + tab->read_record.unlock_row= rr_unlock_row; + + } // QEP has been modified + + /* + Cleanup: + We may have both a 'select->quick' and 'save_quick' (original) + at this point. Delete the one that we wan't use. + */ + +skipped_filesort: + // Keep current (ordered) select->quick + if (select && save_quick != select->quick) + { + delete save_quick; + save_quick= NULL; + } + if (orig_cond_saved && !changed_key) + tab->set_cond(orig_cond); + if (!no_changes && changed_key && table->file->pushed_idx_cond) + table->file->cancel_pushed_idx_cond(); + + DBUG_RETURN(1); + +use_filesort: + // Restore original save_quick + if (select && select->quick != save_quick) + { + delete select->quick; + select->quick= save_quick; + } + if (orig_cond_saved) + tab->set_cond(orig_cond); + + DBUG_RETURN(0); +} + + +/* + If not selecting by given key, create an index how records should be read + + SYNOPSIS + create_sort_index() + thd Thread handler + join Join with table to sort + join_tab What table to sort + fsort Filesort object. NULL means "use tab->filesort". + + IMPLEMENTATION + - If there is an index that can be used, the first non-const join_tab in + 'join' is modified to use this index. + - If no index, create with filesort() an index file that can be used to + retrieve rows in order (should be done with 'read_record'). + The sorted data is stored in tab->filesort + + RETURN VALUES + 0 ok + -1 Some fatal error + 1 No records +*/ + +int +create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort) +{ + TABLE *table; + SQL_SELECT *select; + bool quick_created= FALSE; + SORT_INFO *file_sort= 0; + DBUG_ENTER("create_sort_index"); + + if (fsort == NULL) + fsort= tab->filesort; + + table= tab->table; + select= fsort->select; + + table->status=0; // May be wrong if quick_select + + if (!tab->preread_init_done && tab->preread_init()) + goto err; + + // If table has a range, move it to select + if (select && tab->ref.key >= 0) + { + if (!select->quick) + { + if (tab->quick) + { + select->quick= tab->quick; + tab->quick= NULL; + /* + We can only use 'Only index' if quick key is same as ref_key + and in index_merge 'Only index' cannot be used + */ + if (((uint) tab->ref.key != select->quick->index)) + table->file->ha_end_keyread(); + } + else + { + /* + We have a ref on a const; Change this to a range that filesort + can use. + For impossible ranges (like when doing a lookup on NULL on a NOT NULL + field, quick will contain an empty record set. + */ + if (!(select->quick= (tab->type == JT_FT ? + get_ft_select(thd, table, tab->ref.key) : + get_quick_select_for_ref(thd, table, &tab->ref, + tab->found_records)))) + goto err; + quick_created= TRUE; + } + fsort->own_select= true; + } + else + { + DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF); + // Update ref value + if (unlikely(cp_buffer_from_ref(thd, table, &tab->ref) && + thd->is_error())) + goto err; // out of memory + } + } + + + /* Fill schema tables with data before filesort if it's necessary */ + if ((join->select_lex->options & OPTION_SCHEMA_TABLE) && + unlikely(get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))) + goto err; + + if (table->s->tmp_table) + table->file->info(HA_STATUS_VARIABLE); // Get record count + fsort->accepted_rows= &join->accepted_rows; // For ROWNUM + file_sort= filesort(thd, table, fsort, fsort->tracker, join, tab->table->map); + DBUG_ASSERT(tab->filesort_result == 0); + tab->filesort_result= file_sort; + tab->records= 0; + if (file_sort) + { + tab->records= join->select_options & OPTION_FOUND_ROWS ? + file_sort->found_rows : file_sort->return_rows; + tab->join->join_examined_rows+= file_sort->examined_rows; + } + + if (quick_created) + { + /* This will delete the quick select. */ + select->cleanup(); + } + + table->file->ha_end_keyread(); + if (tab->type == JT_FT) + table->file->ha_ft_end(); + else + table->file->ha_index_or_rnd_end(); + + DBUG_RETURN(file_sort == 0); +err: + DBUG_RETURN(-1); +} + + +/** + Compare fields from table->record[0] and table->record[1], + possibly skipping few first fields. + + @param table + @param ptr field to start the comparison from, + somewhere in the table->field[] array + + @retval 1 different + @retval 0 identical +*/ +static bool compare_record(TABLE *table, Field **ptr) +{ + for (; *ptr ; ptr++) + { + Field *f= *ptr; + if (f->is_null() != f->is_null(table->s->rec_buff_length) || + (!f->is_null() && f->cmp_offset(table->s->rec_buff_length))) + return 1; + } + return 0; +} + +static bool copy_blobs(Field **ptr) +{ + for (; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + if (((Field_blob *) (*ptr))->copy()) + return 1; // Error + } + return 0; +} + +static void free_blobs(Field **ptr) +{ + for (; *ptr ; ptr++) + { + if ((*ptr)->flags & BLOB_FLAG) + ((Field_blob *) (*ptr))->free(); + } +} + + +/* + @brief + Remove duplicates from a temporary table. + + @detail + Remove duplicate rows from a temporary table. This is used for e.g. queries + like + + select distinct count(*) as CNT from tbl group by col + + Here, we get a group table with count(*) values. It is not possible to + prevent duplicates from appearing in the table (as we don't know the values + before we've done the grouping). Because of that, we have this function to + scan the temptable (maybe, multiple times) and remove the duplicate rows + + Rows that do not satisfy 'having' condition are also removed. +*/ + +bool +JOIN_TAB::remove_duplicates() + +{ + bool error; + ulong keylength= 0, sort_field_keylength= 0; + uint field_count, item_count; + List<Item> *fields= (this-1)->fields; + Item *item; + THD *thd= join->thd; + SORT_FIELD *sortorder, *sorder; + DBUG_ENTER("remove_duplicates"); + + DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE); + THD_STAGE_INFO(join->thd, stage_removing_duplicates); + + if (!(sortorder= (SORT_FIELD*) my_malloc(PSI_INSTRUMENT_ME, + (fields->elements+1) * + sizeof(SORT_FIELD), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + + /* Calculate how many saved fields there is in list */ + field_count= item_count= 0; + + List_iterator<Item> it(*fields); + for (sorder= sortorder ; (item=it++) ;) + { + if (!item->const_item()) + { + if (item->get_tmp_table_field()) + { + /* Field is stored in temporary table, skipp */ + field_count++; + } + else + { + /* Item is not stored in temporary table, remember it */ + sorder->item= item; + sorder->type= sorder->item->type_handler()->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + /* Calculate sorder->length */ + item->type_handler()->sort_length(thd, item, sorder); + sorder++; + item_count++; + } + } + } + sorder->item= 0; // End marker + + if ((field_count + item_count == 0) && ! having && + !(join->select_options & OPTION_FOUND_ROWS)) + { + // only const items with no OPTION_FOUND_ROWS + join->unit->lim.send_first_row(); // Only send first row + my_free(sortorder); + DBUG_RETURN(false); + } + + /* + The table contains first fields that will be in the output, then + temporary results pointed to by the fields list. + Example: SELECT DISTINCT sum(a), sum(d) > 2 FROM ... + In this case the temporary table contains sum(a), sum(d). + */ + + Field **first_field=table->field+table->s->fields - field_count; + for (Field **ptr=first_field; *ptr; ptr++) + keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null(); + for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++) + sort_field_keylength+= ptr->length + (ptr->item->maybe_null() ? 1 : 0); + + /* + Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely + duplicate removal, and produce a possibly incomplete query result. + */ + thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX; + if (thd->killed == ABORT_QUERY) + thd->reset_killed(); + + table->file->info(HA_STATUS_VARIABLE); + table->reginfo.lock_type=TL_WRITE; + + if (table->s->db_type() == heap_hton || + (!table->s->blob_fields && + ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records < + thd->variables.sortbuff_size))) + error= remove_dup_with_hash_index(join->thd, table, field_count, + first_field, sortorder, + keylength + sort_field_keylength, having); + else + error=remove_dup_with_compare(join->thd, table, first_field, sortorder, + sort_field_keylength, having); + + if (join->select_lex != join->select_lex->master_unit()->fake_select_lex) + thd->lex->set_limit_rows_examined(); + free_blobs(first_field); + my_free(sortorder); + DBUG_RETURN(error); +} + + +/* + Create a sort/compare key from items + + Key is of fixed length and binary comparable +*/ + +static uchar *make_sort_key(SORT_FIELD *sortorder, uchar *key_buffer, + String *tmp_value) +{ + for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++) + { + ptr->item->type_handler()->make_sort_key_part(key_buffer, + ptr->item, + ptr, tmp_value); + key_buffer+= (ptr->item->maybe_null() ? 1 : 0) + ptr->length; + } + return key_buffer; +} + + +/* + Remove duplicates by comparing all rows with all other rows + + @param thd THD + @param table Temporary table + @param first_field Pointer to fields in temporary table that are part of + distinct, ends with null pointer + @param sortorder An array of Items part of distsinct. Terminated with an + element N with sortorder[N]->item=NULL. + @param keylength Length of key produced by sortorder + @param having Having expression (NULL if no having) +*/ + +static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, + SORT_FIELD *sortorder, ulong keylength, + Item *having) +{ + handler *file=table->file; + uchar *record=table->record[0], *key_buffer, *key_buffer2; + char *tmp_buffer; + int error; + String tmp_value; + DBUG_ENTER("remove_dup_with_compare"); + + if (unlikely(!my_multi_malloc(PSI_INSTRUMENT_ME, + MYF(MY_WME), + &key_buffer, keylength, + &key_buffer2, keylength, + &tmp_buffer, keylength+1, + NullS))) + DBUG_RETURN(1); + tmp_value.set(tmp_buffer, keylength, &my_charset_bin); + + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(1); + + error= file->ha_rnd_next(record); + for (;;) + { + if (unlikely(thd->check_killed())) + { + error= 1; + goto end; + } + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + if (having && !having->val_int()) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + error= file->ha_rnd_next(record); + continue; + } + if (unlikely(copy_blobs(first_field))) + { + my_message(ER_OUTOFMEMORY, ER_THD(thd,ER_OUTOFMEMORY), + MYF(ME_FATAL)); + error= 1; + goto end; + } + make_sort_key(sortorder, key_buffer, &tmp_value); + store_record(table,record[1]); + + /* Read through rest of file and mark duplicated rows deleted */ + bool found=0; + for (;;) + { + if (unlikely((error= file->ha_rnd_next(record)))) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + make_sort_key(sortorder, key_buffer2, &tmp_value); + if (compare_record(table, first_field) == 0 && + (!keylength || + memcmp(key_buffer, key_buffer2, keylength) == 0)) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + } + else if (!found) + { + found=1; + if (unlikely((error= file->remember_rnd_pos()))) + goto err; + } + } + if (!found) + break; // End of file + /* Restart search on saved row */ + if (unlikely((error= file->restart_rnd_next(record)))) + goto err; + } + + error= 0; +end: + my_free(key_buffer); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + DBUG_RETURN(error); + +err: + DBUG_ASSERT(error); + file->print_error(error,MYF(0)); + goto end; +} + + +/** + Generate a hash index for each row to quickly find duplicate rows. + + @param thd THD + @param table Temporary table + @param field_count Number of fields part of distinct + @param first_field Pointer to fields in temporary table that are part of + distinct, ends with null pointer + @param sortorder An array of Items part of distsinct. Terminated with an + element N with sortorder[N]->item=NULL. + @param keylength Length of hash key + @param having Having expression (NULL if no having) + + @note + Note that this will not work on tables with blobs! +*/ + +static int remove_dup_with_hash_index(THD *thd, TABLE *table, + uint field_count, + Field **first_field, + SORT_FIELD *sortorder, + ulong key_length, + Item *having) +{ + uchar *key_buffer, *key_pos, *record=table->record[0]; + char *tmp_buffer; + int error; + handler *file= table->file; + ulong extra_length= ALIGN_SIZE(key_length)-key_length; + uint *field_lengths, *field_length; + HASH hash; + String tmp_value; + DBUG_ENTER("remove_dup_with_hash_index"); + + if (!my_multi_malloc(key_memory_hash_index_key_buffer, MYF(MY_WME), + &key_buffer, + (uint) ((key_length + extra_length) * + (long) file->stats.records), + &field_lengths, + (uint) (field_count*sizeof(*field_lengths)), + &tmp_buffer, key_length+1, + NullS)) + DBUG_RETURN(1); + + tmp_value.set(tmp_buffer, key_length, &my_charset_bin); + field_length= field_lengths; + for (Field **ptr= first_field ; *ptr ; ptr++) + (*field_length++)= (*ptr)->sort_length(); + + if (my_hash_init(key_memory_hash_index_key_buffer, &hash, &my_charset_bin, + (uint) file->stats.records, 0, key_length, + (my_hash_get_key) 0, 0, 0)) + { + my_free(key_buffer); + DBUG_RETURN(1); + } + + if (unlikely((error= file->ha_rnd_init(1)))) + goto err; + + key_pos= key_buffer; + for (;;) + { + uchar *org_key_pos; + if (unlikely(thd->check_killed())) + { + error=0; + goto err; + } + if (unlikely((error= file->ha_rnd_next(record)))) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + if (having && !having->val_int()) + { + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + continue; + } + + /* copy fields to key buffer */ + org_key_pos= key_pos; + field_length=field_lengths; + for (Field **ptr= first_field ; *ptr ; ptr++) + { + (*ptr)->make_sort_key_part(key_pos, *field_length); + key_pos+= (*ptr)->maybe_null() + *field_length++; + } + /* Copy result fields not stored in table to key buffer */ + key_pos= make_sort_key(sortorder, key_pos, &tmp_value); + + /* Check if it exists before */ + if (my_hash_search(&hash, org_key_pos, key_length)) + { + /* Duplicated found ; Remove the row */ + if (unlikely((error= file->ha_delete_row(record)))) + goto err; + } + else + { + if (my_hash_insert(&hash, org_key_pos)) + goto err; + } + key_pos+=extra_length; + } + my_free(key_buffer); + my_hash_free(&hash); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + DBUG_RETURN(0); + +err: + my_free(key_buffer); + my_hash_free(&hash); + file->extra(HA_EXTRA_NO_CACHE); + (void) file->ha_rnd_end(); + if (unlikely(error)) + file->print_error(error,MYF(0)); + DBUG_RETURN(1); +} + + +/* + eq_ref: Create the lookup key and check if it is the same as saved key + + SYNOPSIS + cmp_buffer_with_ref() + tab Join tab of the accessed table + table The table to read. This is usually tab->table, except for + semi-join when we might need to make a lookup in a temptable + instead. + tab_ref The structure with methods to collect index lookup tuple. + This is usually table->ref, except for the case of when we're + doing lookup into semi-join materialization table. + + DESCRIPTION + Used by eq_ref access method: create the index lookup key and check if + we've used this key at previous lookup (If yes, we don't need to repeat + the lookup - the record has been already fetched) + + RETURN + TRUE No cached record for the key, or failed to create the key (due to + out-of-domain error) + FALSE The created key is the same as the previous one (and the record + is already in table->record) +*/ + +static bool +cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref) +{ + bool no_prev_key; + if (!tab_ref->disable_cache) + { + if (!(no_prev_key= tab_ref->key_err)) + { + /* Previous access found a row. Copy its key */ + memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length); + } + } + else + no_prev_key= TRUE; + if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) || + no_prev_key) + return 1; + return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length) + != 0; +} + + +bool +cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref) +{ + enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; + MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set); + bool result= 0; + key_part_map map= 1; + + thd->count_cuted_fields= CHECK_FIELD_IGNORE; + for (store_key **copy=ref->key_copy ; *copy ; copy++, map <<= 1) + { + while (map & ref->const_ref_part_map) // skip const ref parts + map <<= 1; // no store_key objects for them + if ((*copy)->copy(thd) & 1 || + ((ref->null_rejecting & map) && (*copy)->null_key)) + { + result= 1; + break; + } + } + thd->count_cuted_fields= org_count_cuted_fields; + dbug_tmp_restore_column_map(&table->write_set, old_map); + return result; +} + + +/***************************************************************************** + Group and order functions +*****************************************************************************/ + +/** + Resolve an ORDER BY or GROUP BY column reference. + + Given a column reference (represented by 'order') from a GROUP BY or ORDER + BY clause, find the actual column it represents. If the column being + resolved is from the GROUP BY clause, the procedure searches the SELECT + list 'fields' and the columns in the FROM list 'tables'. If 'order' is from + the ORDER BY clause, only the SELECT list is being searched. + + If 'order' is resolved to an Item, then order->item is set to the found + Item. If there is no item for the found column (that is, it was resolved + into a table field), order->item is 'fixed' and is added to all_fields and + ref_pointer_array. + + ref_pointer_array and all_fields are updated. + + @param[in] thd Pointer to current thread structure + @param[in,out] ref_pointer_array All select, group and order by fields + @param[in] tables List of tables to search in (usually + FROM clause) + @param[in] order Column reference to be resolved + @param[in] fields List of fields to search in (usually + SELECT list) + @param[in,out] all_fields All select, group and order by fields + @param[in] is_group_field True if order is a GROUP field, false if + ORDER by field + @param[in] add_to_all_fields If the item is to be added to all_fields and + ref_pointer_array, this flag can be set to + false to stop the automatic insertion. + @param[in] from_window_spec If true then order is from a window spec + + @retval + FALSE if OK + @retval + TRUE if error occurred +*/ + +static bool +find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, + TABLE_LIST *tables, + ORDER *order, List<Item> &fields, List<Item> &all_fields, + bool is_group_field, bool add_to_all_fields, + bool from_window_spec) +{ + Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */ + Item::Type order_item_type; + Item **select_item; /* The corresponding item from the SELECT clause. */ + Field *from_field; /* The corresponding field from the FROM clause. */ + uint counter; + enum_resolution_type resolution; + + if (order_item->is_order_clause_position() && !from_window_spec) + { /* Order by position */ + uint count; + if (order->counter_used) + count= order->counter; // counter was once resolved + else + count= (uint) order_item->val_int(); + if (!count || count > fields.elements) + { + my_error(ER_BAD_FIELD_ERROR, MYF(0), + order_item->full_name(), thd->where); + return TRUE; + } + thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]); + order->in_field_list= 1; + order->counter= count; + order->counter_used= 1; + return FALSE; + } + /* Lookup the current GROUP/ORDER field in the SELECT clause. */ + select_item= find_item_in_list(order_item, fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution); + if (!select_item) + return TRUE; /* The item is not unique, or some other error occurred. */ + + + /* Check whether the resolved field is not ambiguos. */ + if (select_item != not_found_item) + { + Item *view_ref= NULL; + /* + If we have found field not by its alias in select list but by its + original field name, we should additionally check if we have conflict + for this name (in case if we would perform lookup in all tables). + */ + if (resolution == RESOLVED_BEHIND_ALIAS && + order_item->fix_fields_if_needed_for_order_by(thd, order->item)) + return TRUE; + + /* Lookup the current GROUP field in the FROM clause. */ + order_item_type= order_item->type(); + from_field= (Field*) not_found_field; + if ((is_group_field && order_item_type == Item::FIELD_ITEM) || + order_item_type == Item::REF_ITEM) + { + from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables, + NULL, ignored_tables_list_t(NULL), + &view_ref, IGNORE_ERRORS, FALSE, FALSE); + if (!from_field) + from_field= (Field*) not_found_field; + } + + if (from_field == not_found_field || + (from_field != view_ref_found ? + /* it is field of base table => check that fields are same */ + ((*select_item)->type() == Item::FIELD_ITEM && + ((Item_field*) (*select_item))->field->eq(from_field)) : + /* + in is field of view table => check that references on translation + table are same + */ + ((*select_item)->type() == Item::REF_ITEM && + view_ref->type() == Item::REF_ITEM && + ((Item_ref *) (*select_item))->ref == + ((Item_ref *) view_ref)->ref))) + { + /* + If there is no such field in the FROM clause, or it is the same field + as the one found in the SELECT clause, then use the Item created for + the SELECT field. As a result if there was a derived field that + 'shadowed' a table field with the same name, the table field will be + chosen over the derived field. + */ + order->item= &ref_pointer_array[counter]; + order->in_field_list=1; + return FALSE; + } + else + { + /* + There is a field with the same name in the FROM clause. This + is the field that will be chosen. In this case we issue a + warning so the user knows that the field from the FROM clause + overshadows the column reference from the SELECT list. + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NON_UNIQ_ERROR, + ER_THD(thd, ER_NON_UNIQ_ERROR), + ((Item_ident*) order_item)->field_name.str, + thd->where); + } + } + else if (from_window_spec) + { + Item **found_item= find_item_in_list(order_item, all_fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution, + all_fields.elements - fields.elements); + if (found_item != not_found_item) + { + order->item= &ref_pointer_array[all_fields.elements-1-counter]; + order->in_field_list= 0; + return FALSE; + } + } + + order->in_field_list=0; + /* + The call to order_item->fix_fields() means that here we resolve + 'order_item' to a column from a table in the list 'tables', or to + a column in some outer query. Exactly because of the second case + we come to this point even if (select_item == not_found_item), + inspite of that fix_fields() calls find_item_in_list() one more + time. + + We check order_item->fixed() because Item_func_group_concat can put + arguments for which fix_fields already was called. + */ + if (order_item->fix_fields_if_needed_for_order_by(thd, order->item) || + thd->is_error()) + return TRUE; /* Wrong field. */ + order_item= *order->item; // Item can change during fix_fields() + + if (!add_to_all_fields) + return FALSE; + + uint el= all_fields.elements; + /* Add new field to field list. */ + all_fields.push_front(order_item, thd->mem_root); + ref_pointer_array[el]= order_item; + /* + If the order_item is a SUM_FUNC_ITEM, when fix_fields is called + ref_by is set to order->item which is the address of order_item. + But this needs to be address of order_item in the all_fields list. + As a result, when it gets replaced with Item_aggregate_ref + object in Item::split_sum_func2, we will be able to retrieve the + newly created object. + */ + if (order_item->type() == Item::SUM_FUNC_ITEM) + ((Item_sum *)order_item)->ref_by= all_fields.head_ref(); + + order->item= &ref_pointer_array[el]; + return FALSE; +} + + +/** + Change order to point at item in select list. + + If item isn't a number and doesn't exits in the select list, add it the + the field list. +*/ + +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List<Item> &all_fields, ORDER *order, + bool from_window_spec) +{ + SELECT_LEX *select = thd->lex->current_select; + enum_parsing_place context_analysis_place= + thd->lex->current_select->context_analysis_place; + thd->where="order clause"; + const bool for_union= select->master_unit()->is_unit_op() && + select == select->master_unit()->fake_select_lex; + for (uint number = 1; order; order=order->next, number++) + { + if (find_order_in_list(thd, ref_pointer_array, tables, order, fields, + all_fields, false, true, from_window_spec)) + return 1; + Item * const item= *order->item; + if (item->with_window_func() && context_analysis_place != IN_ORDER_BY) + { + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); + return 1; + } + + /* + UNION queries cannot be used with an aggregate function in + an ORDER BY clause + */ + + if (for_union && (item->with_sum_func() || item->with_window_func())) + { + my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number); + return 1; + } + + if ((from_window_spec && item->with_sum_func() && + item->type() != Item::SUM_FUNC_ITEM) || item->with_window_func()) + { + item->split_sum_func(thd, ref_pointer_array, + all_fields, SPLIT_SUM_SELECT); + } + } + return 0; +} + + +/** + Intitialize the GROUP BY list. + + @param thd Thread handler + @param ref_pointer_array We store references to all fields that was + not in 'fields' here. + @param fields All fields in the select part. Any item in + 'order' that is part of these list is replaced + by a pointer to this fields. + @param all_fields Total list of all unique fields used by the + select. All items in 'order' that was not part + of fields will be added first to this list. + @param order The fields we should do GROUP/PARTITION BY on + @param hidden_group_fields Pointer to flag that is set to 1 if we added + any fields to all_fields. + @param from_window_spec If true then list is from a window spec + + @todo + change ER_WRONG_FIELD_WITH_GROUP to more detailed + ER_NON_GROUPING_FIELD_USED + + @retval + 0 ok + @retval + 1 error (probably out of memory) +*/ + +int +setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List<Item> &all_fields, ORDER *order, + bool *hidden_group_fields, bool from_window_spec) +{ + enum_parsing_place context_analysis_place= + thd->lex->current_select->context_analysis_place; + *hidden_group_fields=0; + ORDER *ord; + + if (!order) + return 0; /* Everything is ok */ + + uint org_fields=all_fields.elements; + + thd->where="group statement"; + for (ord= order; ord; ord= ord->next) + { + if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields, + all_fields, true, true, from_window_spec)) + return 1; + (*ord->item)->marker= MARKER_UNDEF_POS; /* Mark found */ + if ((*ord->item)->with_sum_func() && context_analysis_place == IN_GROUP_BY) + { + my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name()); + return 1; + } + if ((*ord->item)->with_window_func()) + { + if (context_analysis_place == IN_GROUP_BY) + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + else + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); + return 1; + } + if (from_window_spec && (*ord->item)->with_sum_func() && + (*ord->item)->type() != Item::SUM_FUNC_ITEM) + (*ord->item)->split_sum_func(thd, ref_pointer_array, + all_fields, SPLIT_SUM_SELECT); + } + if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && + context_analysis_place == IN_GROUP_BY) + { + /* + Don't allow one to use fields that is not used in GROUP BY + For each select a list of field references that aren't under an + aggregate function is created. Each field in this list keeps the + position of the select list expression which it belongs to. + + First we check an expression from the select list against the GROUP BY + list. If it's found there then it's ok. It's also ok if this expression + is a constant or an aggregate function. Otherwise we scan the list + of non-aggregated fields and if we'll find at least one field reference + that belongs to this expression and doesn't occur in the GROUP BY list + we throw an error. If there are no fields in the created list for a + select list expression this means that all fields in it are used under + aggregate functions. + + Note that for items in the select list (fields), Item_field->markers + contains the position of the field in the select list. + */ + Item *item; + Item_field *field; + int cur_pos_in_select_list= 0; + List_iterator<Item> li(fields); + List_iterator<Item_field> naf_it(thd->lex->current_select->join->non_agg_fields); + + field= naf_it++; + while (field && (item=li++)) + { + if (item->type() != Item::SUM_FUNC_ITEM && + item->marker != MARKER_UNDEF_POS && + !item->const_item() && + !(item->real_item()->type() == Item::FIELD_ITEM && + item->used_tables() & OUTER_REF_TABLE_BIT)) + { + while (field) + { + /* Skip fields from previous expressions. */ + if (field->marker < cur_pos_in_select_list) + goto next_field; + /* Found a field from the next expression. */ + if (field->marker > cur_pos_in_select_list) + break; + /* + Check whether the field occur in the GROUP BY list. + Throw the error later if the field isn't found. + */ + for (ord= order; ord; ord= ord->next) + if ((*ord->item)->eq((Item*)field, 0)) + goto next_field; + /* + TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed + ER_NON_GROUPING_FIELD_USED + */ + my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), field->full_name()); + return 1; +next_field: + field= naf_it++; + } + } + cur_pos_in_select_list++; + } + } + if (org_fields != all_fields.elements) + *hidden_group_fields=1; // group fields is not used + return 0; +} + +/** + Add fields with aren't used at start of field list. + + @return + FALSE if ok +*/ + +static bool +setup_new_fields(THD *thd, List<Item> &fields, + List<Item> &all_fields, ORDER *new_field) +{ + Item **item; + uint counter; + enum_resolution_type not_used; + DBUG_ENTER("setup_new_fields"); + + thd->column_usage= MARK_COLUMNS_READ; // Not really needed, but... + for (; new_field ; new_field= new_field->next) + { + if ((item= find_item_in_list(*new_field->item, fields, &counter, + IGNORE_ERRORS, ¬_used))) + new_field->item=item; /* Change to shared Item */ + else + { + thd->where="procedure list"; + if ((*new_field->item)->fix_fields(thd, new_field->item)) + DBUG_RETURN(1); /* purecov: inspected */ + all_fields.push_front(*new_field->item, thd->mem_root); + new_field->item=all_fields.head_ref(); + } + } + DBUG_RETURN(0); +} + +/** + Create a group by that consist of all non const fields. + + Try to use the fields in the order given by 'order' to allow one to + optimize away 'order by'. + + @retval + 0 OOM error if thd->is_fatal_error is set. Otherwise group was eliminated + # Pointer to new group +*/ + +ORDER * +create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, + ORDER *order_list, List<Item> &fields, + List<Item> &all_fields, + bool *all_order_by_fields_used) +{ + List_iterator<Item> li(fields); + Item *item; + Ref_ptr_array orig_ref_pointer_array= ref_pointer_array; + ORDER *order,*group,**prev; + uint idx= 0; + + *all_order_by_fields_used= 1; + while ((item=li++)) + item->marker= MARKER_UNUSED; /* Marker that field is not used */ + + prev= &group; group=0; + for (order=order_list ; order; order=order->next) + { + if (order->in_field_list) + { + ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER)); + if (!ord) + return 0; + *prev=ord; + prev= &ord->next; + (*ord->item)->marker= MARKER_FOUND_IN_ORDER; + } + else + *all_order_by_fields_used= 0; + } + + li.rewind(); + while ((item=li++)) + { + if (!item->const_item() && !item->with_sum_func() && + item->marker == MARKER_UNUSED) + { + /* + Don't put duplicate columns from the SELECT list into the + GROUP BY list. + */ + ORDER *ord_iter; + for (ord_iter= group; ord_iter; ord_iter= ord_iter->next) + if ((*ord_iter->item)->eq(item, 1)) + goto next_item; + + ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER)); + if (!ord) + return 0; + + if (item->type() == Item::FIELD_ITEM && + item->field_type() == MYSQL_TYPE_BIT) + { + /* + Because HEAP tables can't index BIT fields we need to use an + additional hidden field for grouping because later it will be + converted to a LONG field. Original field will remain of the + BIT type and will be returned [el]client. + */ + Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item); + if (!new_item) + return 0; + int el= all_fields.elements; + orig_ref_pointer_array[el]= new_item; + all_fields.push_front(new_item, thd->mem_root); + ord->item=&orig_ref_pointer_array[el]; + } + else + { + /* + We have here only field_list (not all_field_list), so we can use + simple indexing of ref_pointer_array (order in the array and in the + list are same) + */ + ord->item= &ref_pointer_array[idx]; + } + ord->direction= ORDER::ORDER_ASC; + *prev=ord; + prev= &ord->next; + } +next_item: + idx++; + } + *prev=0; + return group; +} + + +/** + Update join with count of the different type of fields. +*/ + +void +count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, + List<Item> &fields, bool reset_with_sum_func) +{ + List_iterator<Item> li(fields); + Item *field; + + param->field_count=param->sum_func_count=param->func_count= + param->hidden_field_count=0; + param->quick_group=1; + while ((field=li++)) + { + Item::Type real_type= field->real_item()->type(); + if (real_type == Item::FIELD_ITEM) + param->field_count++; + else if (real_type == Item::SUM_FUNC_ITEM) + { + if (! field->const_item()) + { + Item_sum *sum_item=(Item_sum*) field->real_item(); + if (!sum_item->depended_from() || + sum_item->depended_from() == select_lex) + { + if (!sum_item->quick_group) + param->quick_group=0; // UDF SUM function + param->sum_func_count++; + + for (uint i=0 ; i < sum_item->get_arg_count() ; i++) + { + if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM) + param->field_count++; + else + param->func_count++; + } + } + param->func_count++; + } + } + else + { + param->func_count++; + if (reset_with_sum_func) + field->with_flags&= ~item_with_t::SUM_FUNC; + } + } +} + + +/** + Return 1 if second is a subpart of first argument. + + SIDE EFFECT: + For all the first items in the group by list that match, the sort + direction of the GROUP BY items are set to the same as those given by the + ORDER BY. + The direction of the group does not matter if the ORDER BY clause overrides + it anyway. +*/ + +static bool +test_if_subpart(ORDER *group_by, ORDER *order_by) +{ + while (group_by && order_by) + { + if ((*group_by->item)->eq(*order_by->item, 1)) + group_by->direction= order_by->direction; + else + return 0; + group_by= group_by->next; + order_by= order_by->next; + } + return MY_TEST(!order_by); +} + +/** + Return table number if there is only one table in sort order + and group and order is compatible, else return 0. +*/ + +static TABLE * +get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables, + table_map const_tables) +{ + TABLE_LIST *table; + List_iterator<TABLE_LIST> ti(tables); + table_map map= (table_map) 0; + DBUG_ENTER("get_sort_by_table"); + + if (!a) + a=b; // Only one need to be given + else if (!b) + b=a; + + for (; a && b; a=a->next,b=b->next) + { + /* Skip elements of a that are constant */ + while (!((*a->item)->used_tables() & ~const_tables)) + { + if (!(a= a->next)) + break; + } + + /* Skip elements of b that are constant */ + while (!((*b->item)->used_tables() & ~const_tables)) + { + if (!(b= b->next)) + break; + } + + if (!a || !b) + break; + + if (!(*a->item)->eq(*b->item,1)) + DBUG_RETURN(0); + map|=a->item[0]->used_tables(); + } + if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))) + DBUG_RETURN(0); + + map&= ~const_tables; + while ((table= ti++) && !(map & table->table->map)) ; + if (map != table->table->map) + DBUG_RETURN(0); // More than one table + DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr)); + DBUG_RETURN(table->table); +} + + +/** + calc how big buffer we need for comparing group entries. +*/ + +void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) +{ + uint key_length=0, parts=0, null_parts=0; + + for (; group ; group=group->next) + { + Item *group_item= *group->item; + Field *field= group_item->get_tmp_table_field(); + if (field) + { + enum_field_types type; + if ((type= field->type()) == MYSQL_TYPE_BLOB) + key_length+=MAX_BLOB_WIDTH; // Can't be used as a key + else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING) + key_length+= field->field_length + HA_KEY_BLOB_LENGTH; + else if (type == MYSQL_TYPE_BIT) + { + /* Bit is usually stored as a longlong key for group fields */ + key_length+= 8; // Big enough + } + else + key_length+= field->pack_length(); + } + else + { + switch (group_item->cmp_type()) { + case REAL_RESULT: + key_length+= sizeof(double); + break; + case INT_RESULT: + key_length+= sizeof(longlong); + break; + case DECIMAL_RESULT: + key_length+= my_decimal_get_binary_size(group_item->max_length - + (group_item->decimals ? 1 : 0), + group_item->decimals); + break; + case TIME_RESULT: + { + /* + As items represented as DATE/TIME fields in the group buffer + have STRING_RESULT result type, we increase the length + by 8 as maximum pack length of such fields. + */ + key_length+= 8; + break; + } + case STRING_RESULT: + { + enum enum_field_types type= group_item->field_type(); + if (type == MYSQL_TYPE_BLOB) + key_length+= MAX_BLOB_WIDTH; // Can't be used as a key + else + { + /* + Group strings are taken as varstrings and require an length field. + A field is not yet created by create_tmp_field_ex() + and the sizes should match up. + */ + key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH; + } + break; + } + default: + /* This case should never be chosen */ + DBUG_ASSERT(0); + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + } + } + parts++; + if (group_item->maybe_null()) + null_parts++; + } + param->group_length= key_length + null_parts; + param->group_parts= parts; + param->group_null_parts= null_parts; +} + +static void calc_group_buffer(JOIN *join, ORDER *group) +{ + if (group) + join->group= 1; + calc_group_buffer(&join->tmp_table_param, group); +} + + +/** + allocate group fields or take prepared (cached). + + @param main_join join of current select + @param curr_join current join (join of current select or temporary copy + of it) + + @retval + 0 ok + @retval + 1 failed +*/ + +static bool +make_group_fields(JOIN *main_join, JOIN *curr_join) +{ + if (main_join->group_fields_cache.elements) + { + curr_join->group_fields= main_join->group_fields_cache; + curr_join->sort_and_group= 1; + } + else + { + if (alloc_group_fields(curr_join, curr_join->group_list)) + return (1); + main_join->group_fields_cache= curr_join->group_fields; + } + return (0); +} + +static bool +fill_cached_item_list(THD *thd, List<Cached_item> *list, ORDER *order, + uint max_number_of_elements = UINT_MAX) +{ + for (; order && max_number_of_elements ; + order= order->next, max_number_of_elements--) + { + Cached_item *tmp= new_Cached_item(thd, *order->item, true); + if (!tmp || list->push_front(tmp)) + return true; + } + return false; +} + +/** + Get a list of buffers for saving last group. + + Groups are saved in reverse order for easier check loop. +*/ + +static bool +alloc_group_fields(JOIN *join, ORDER *group) +{ + if (fill_cached_item_list(join->thd, &join->group_fields, group)) + return true; + join->sort_and_group=1; /* Mark for do_select */ + return false; +} + +static bool +alloc_order_fields(JOIN *join, ORDER *order, uint max_number_of_elements) +{ + return fill_cached_item_list(join->thd, &join->order_fields, order, + max_number_of_elements); +} + + +/* + Test if a single-row cache of items changed, and update the cache. + + @details Test if a list of items that typically represents a result + row has changed. If the value of some item changed, update the cached + value for this item. + + @param list list of <item, cached_value> pairs stored as Cached_item. + + @return -1 if no item changed + @return index of the first item that changed +*/ + +int test_if_item_cache_changed(List<Cached_item> &list) +{ + DBUG_ENTER("test_if_item_cache_changed"); + List_iterator<Cached_item> li(list); + int idx= -1,i; + Cached_item *buff; + + for (i=(int) list.elements-1 ; (buff=li++) ; i--) + { + if (buff->cmp()) + idx=i; + } + DBUG_PRINT("info", ("idx: %d", idx)); + DBUG_RETURN(idx); +} + + +/* + @return + -1 - Group not changed + value>=0 - Number of the component where the group changed +*/ + +int +test_if_group_changed(List<Cached_item> &list) +{ + DBUG_ENTER("test_if_group_changed"); + List_iterator<Cached_item> li(list); + int idx= -1,i; + Cached_item *buff; + + for (i=(int) list.elements-1 ; (buff=li++) ; i--) + { + if (buff->cmp()) + idx=i; + } + DBUG_PRINT("info", ("idx: %d", idx)); + DBUG_RETURN(idx); +} + + +/** + Setup copy_fields to save fields at start of new group. + + Setup copy_fields to save fields at start of new group + + Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups. + Change old item_field to use a new field with points at saved fieldvalue + This function is only called before use of send_result_set_metadata. + + @param thd THD pointer + @param param temporary table parameters + @param ref_pointer_array array of pointers to top elements of filed list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @todo + In most cases this result will be sent to the user. + This should be changed to use copy_int or copy_real depending + on how the value is to be used: In some cases this may be an + argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) + + @retval + 0 ok + @retval + !=0 error +*/ + +bool +setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, + Ref_ptr_array ref_pointer_array, + List<Item> &res_selected_fields, List<Item> &res_all_fields, + uint elements, List<Item> &all_fields) +{ + Item *pos; + List_iterator_fast<Item> li(all_fields); + Copy_field *copy= NULL; + Copy_field *copy_start __attribute__((unused)); + res_selected_fields.empty(); + res_all_fields.empty(); + List_iterator_fast<Item> itr(res_all_fields); + List<Item> extra_funcs; + uint i, border= all_fields.elements - elements; + DBUG_ENTER("setup_copy_fields"); + + if (param->field_count && + !(copy=param->copy_field= new (thd->mem_root) Copy_field[param->field_count])) + goto err2; + + param->copy_funcs.empty(); + copy_start= copy; + for (i= 0; (pos= li++); i++) + { + Field *field; + uchar *tmp; + Item *real_pos= pos->real_item(); + /* + Aggregate functions can be substituted for fields (by e.g. temp tables). + We need to filter those substituted fields out. + */ + if (real_pos->type() == Item::FIELD_ITEM && + !(real_pos != pos && + ((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF)) + { + Item_field *item; + if (!(item= new (thd->mem_root) Item_field(thd, ((Item_field*) real_pos)))) + goto err; + if (pos->type() == Item::REF_ITEM) + { + /* preserve the names of the ref when dereferncing */ + Item_ref *ref= (Item_ref *) pos; + item->db_name= ref->db_name; + item->table_name= ref->table_name; + item->name= ref->name; + } + pos= item; + if (item->field->flags & BLOB_FLAG) + { + if (!(pos= new (thd->mem_root) Item_copy_string(thd, pos))) + goto err; + /* + Item_copy_string::copy for function can call + Item_copy_string::val_int for blob via Item_ref. + But if Item_copy_string::copy for blob isn't called before, + it's value will be wrong + so let's insert Item_copy_string for blobs in the beginning of + copy_funcs + (to see full test case look at having.test, BUG #4358) + */ + if (param->copy_funcs.push_front(pos, thd->mem_root)) + goto err; + } + else + { + /* + set up save buffer and change result_field to point at + saved value + */ + field= item->field; + item->result_field=field->make_new_field(thd->mem_root, + field->table, 1); + /* + We need to allocate one extra byte for null handling and + another extra byte to not get warnings from purify in + Field_string::val_int + */ + if (!(tmp= (uchar*) thd->alloc(field->pack_length()+2))) + goto err; + if (copy) + { + DBUG_ASSERT (param->field_count > (uint) (copy - copy_start)); + copy->set(tmp, item->result_field); + item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1); +#ifdef HAVE_valgrind + copy->to_ptr[copy->from_length]= 0; +#endif + copy++; + } + } + } + else if ((real_pos->type() == Item::FUNC_ITEM || + real_pos->real_type() == Item::SUBSELECT_ITEM || + real_pos->type() == Item::CACHE_ITEM || + real_pos->type() == Item::COND_ITEM) && + !real_pos->with_sum_func()) + { // Save for send fields + LEX_CSTRING real_name= pos->name; + pos= real_pos; + pos->name= real_name; + /* TODO: + In most cases this result will be sent to the user. + This should be changed to use copy_int or copy_real depending + on how the value is to be used: In some cases this may be an + argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) + */ + if (!(pos= pos->type_handler()->create_item_copy(thd, pos))) + goto err; + if (i < border) // HAVING, ORDER and GROUP BY + { + if (extra_funcs.push_back(pos, thd->mem_root)) + goto err; + } + else if (param->copy_funcs.push_back(pos, thd->mem_root)) + goto err; + } + res_all_fields.push_back(pos, thd->mem_root); + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + pos; + } + param->copy_field_end= copy; + + for (i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + /* + Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any + reference used in these will resolve to a item that is already calculated + */ + param->copy_funcs.append(&extra_funcs); + + DBUG_RETURN(0); + + err: + if (copy) + delete [] param->copy_field; // This is never 0 + param->copy_field= 0; +err2: + DBUG_RETURN(TRUE); +} + + +/** + Make a copy of all simple SELECT'ed items. + + This is done at the start of a new group so that we can retrieve + these later when the group changes. +*/ + +void +copy_fields(TMP_TABLE_PARAM *param) +{ + Copy_field *ptr=param->copy_field; + Copy_field *end=param->copy_field_end; + + DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL)); + + for (; ptr != end; ptr++) + (*ptr->do_copy)(ptr); + + List_iterator_fast<Item> it(param->copy_funcs); + Item_copy *item; + while ((item= (Item_copy*) it++)) + item->copy(); +} + + +/** + Make an array of pointers to sum_functions to speed up + sum_func calculation. + + @retval + 0 ok + @retval + 1 Error +*/ + +bool JOIN::alloc_func_list() +{ + uint func_count, group_parts; + DBUG_ENTER("alloc_func_list"); + + func_count= tmp_table_param.sum_func_count; + /* + If we are using rollup, we need a copy of the summary functions for + each level + */ + if (rollup.state != ROLLUP::STATE_NONE) + func_count*= (send_group_parts+1); + + group_parts= send_group_parts; + /* + If distinct, reserve memory for possible + disctinct->group_by optimization + */ + if (select_distinct) + { + group_parts+= fields_list.elements; + /* + If the ORDER clause is specified then it's possible that + it also will be optimized, so reserve space for it too + */ + if (order) + { + ORDER *ord; + for (ord= order; ord; ord= ord->next) + group_parts++; + } + } + + /* This must use calloc() as rollup_make_fields depends on this */ + sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) + + sizeof(Item_sum***) * (group_parts+1)); + sum_funcs_end= (Item_sum***) (sum_funcs+func_count+1); + DBUG_RETURN(sum_funcs == 0); +} + + +/** + Initialize 'sum_funcs' array with all Item_sum objects. + + @param field_list All items + @param send_result_set_metadata Items in select list + @param before_group_by Set to 1 if this is called before GROUP BY handling + + @retval + 0 ok + @retval + 1 error +*/ + +bool JOIN::make_sum_func_list(List<Item> &field_list, + List<Item> &send_result_set_metadata, + bool before_group_by) +{ + List_iterator_fast<Item> it(field_list); + Item_sum **func; + Item *item; + DBUG_ENTER("make_sum_func_list"); + + func= sum_funcs; + while ((item=it++)) + { + if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() && + (!((Item_sum*) item)->depended_from() || + ((Item_sum *)item)->depended_from() == select_lex)) + *func++= (Item_sum*) item; + } + if (before_group_by && rollup.state == ROLLUP::STATE_INITED) + { + rollup.state= ROLLUP::STATE_READY; + if (rollup_make_fields(field_list, send_result_set_metadata, &func)) + DBUG_RETURN(TRUE); // Should never happen + } + else if (rollup.state == ROLLUP::STATE_NONE) + { + for (uint i=0 ; i <= send_group_parts ;i++) + sum_funcs_end[i]= func; + } + else if (rollup.state == ROLLUP::STATE_READY) + DBUG_RETURN(FALSE); // Don't put end marker + *func=0; // End marker + DBUG_RETURN(FALSE); +} + + +/** + Change all funcs and sum_funcs to fields in tmp table, and create + new list of all items. + + @param thd THD pointer + @param ref_pointer_array array of pointers to top elements of filed list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @retval + 0 ok + @retval + !=0 error +*/ + +static bool +change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &res_selected_fields, + List<Item> &res_all_fields, + uint elements, List<Item> &all_fields) +{ + List_iterator_fast<Item> it(all_fields); + Item *item_field,*item; + DBUG_ENTER("change_to_use_tmp_fields"); + + res_selected_fields.empty(); + res_all_fields.empty(); + + uint border= all_fields.elements - elements; + for (uint i= 0; (item= it++); i++) + { + Field *field; + enum Item::Type item_type= item->type(); + if ((item->with_sum_func() && item_type != Item::SUM_FUNC_ITEM) || + item->with_window_func()) + item_field= item; + else if (item_type == Item::FIELD_ITEM || + item_type == Item::DEFAULT_VALUE_ITEM) + { + if (!(item_field= item->get_tmp_table_item(thd))) + DBUG_RETURN(true); + } + else if (item_type == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC) + { + field= item->get_tmp_table_field(); + if (field != NULL) + { + /* + Replace "@:=<expression>" with "@:=<tmp table + column>". Otherwise, we would re-evaluate <expression>, and + if expression were a subquery, this would access + already-unlocked tables. + */ + Item_func_set_user_var* suv= + new (thd->mem_root) Item_func_set_user_var(thd, (Item_func_set_user_var*) item); + Item_field *new_field= new (thd->mem_root) Item_field(thd, field); + if (!suv || !new_field) + DBUG_RETURN(true); // Fatal error + new_field->set_refers_to_temp_table(); + List<Item> list; + list.push_back(new_field, thd->mem_root); + suv->set_arguments(thd, list); + item_field= suv; + } + else + item_field= item; + } + else if ((field= item->get_tmp_table_field())) + { + if (item->type() == Item::SUM_FUNC_ITEM && field->table->group) + { + item_field= ((Item_sum*) item)->result_item(thd, field); + } + else + { + item_field= (Item*) new (thd->mem_root) Item_field(thd, field); + if (item_field) + ((Item_field*) item_field)->set_refers_to_temp_table(); + } + if (!item_field) + DBUG_RETURN(true); // Fatal error + + if (item->real_item()->type() != Item::FIELD_ITEM) + field->orig_table= 0; + item_field->name= item->name; + if (item->type() == Item::REF_ITEM) + { + Item_field *ifield= (Item_field *) item_field; + Item_ref *iref= (Item_ref *) item; + ifield->table_name= iref->table_name; + ifield->db_name= iref->db_name; + } +#ifndef DBUG_OFF + if (!item_field->name.str) + { + char buff[256]; + String str(buff,sizeof(buff),&my_charset_bin); + str.length(0); + str.extra_allocation(1024); + item->print(&str, QT_ORDINARY); + item_field->name.str= thd->strmake(str.ptr(), str.length()); + item_field->name.length= str.length(); + } +#endif + } + else + item_field= item; + + res_all_fields.push_back(item_field, thd->mem_root); + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + item_field; + } + + List_iterator_fast<Item> itr(res_all_fields); + for (uint i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + DBUG_RETURN(false); +} + + +/** + Change all sum_func refs to fields to point at fields in tmp table. + Change all funcs to be fields in tmp table. + + @param thd THD pointer + @param ref_pointer_array array of pointers to top elements of field list + @param res_selected_fields new list of items of select item list + @param res_all_fields new list of all items + @param elements number of elements in select item list + @param all_fields all fields list + + @retval + 0 ok + @retval + 1 error +*/ + +static bool +change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &res_selected_fields, + List<Item> &res_all_fields, uint elements, + List<Item> &all_fields) +{ + List_iterator_fast<Item> it(all_fields); + Item *item, *new_item; + res_selected_fields.empty(); + res_all_fields.empty(); + + uint i, border= all_fields.elements - elements; + for (i= 0; (item= it++); i++) + { + if (item->type() == Item::SUM_FUNC_ITEM && item->const_item()) + new_item= item; + else + { + if (!(new_item= item->get_tmp_table_item(thd))) + return 1; + } + + if (res_all_fields.push_back(new_item, thd->mem_root)) + return 1; + ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= + new_item; + } + + List_iterator_fast<Item> itr(res_all_fields); + for (i= 0; i < border; i++) + itr++; + itr.sublist(res_selected_fields, elements); + + return thd->is_error(); +} + + + +/****************************************************************************** + Code for calculating functions +******************************************************************************/ + + +/** + Call ::setup for all sum functions. + + @param thd thread handler + @param func_ptr sum function list + + @retval + FALSE ok + @retval + TRUE error +*/ + +static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr) +{ + Item_sum *func; + DBUG_ENTER("setup_sum_funcs"); + while ((func= *(func_ptr++))) + { + if (func->aggregator_setup(thd)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +static bool prepare_sum_aggregators(THD *thd,Item_sum **func_ptr, + bool need_distinct) +{ + Item_sum *func; + DBUG_ENTER("prepare_sum_aggregators"); + while ((func= *(func_ptr++))) + { + if (func->set_aggregator(thd, + need_distinct && func->has_with_distinct() ? + Aggregator::DISTINCT_AGGREGATOR : + Aggregator::SIMPLE_AGGREGATOR)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +static void +init_tmptable_sum_functions(Item_sum **func_ptr) +{ + Item_sum *func; + while ((func= *(func_ptr++))) + func->reset_field(); +} + + +/** Update record 0 in tmp_table from record 1. */ + +static void +update_tmptable_sum_func(Item_sum **func_ptr, + TABLE *tmp_table __attribute__((unused))) +{ + Item_sum *func; + while ((func= *(func_ptr++))) + func->update_field(); +} + + +/** Copy result of sum functions to record in tmp_table. */ + +static void +copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr) +{ + for (; func_ptr != end_ptr ; func_ptr++) + (void) (*func_ptr)->save_in_result_field(1); + return; +} + + +static bool +init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr) +{ + for (; func_ptr != end_ptr ;func_ptr++) + { + if ((*func_ptr)->reset_and_add()) + return 1; + } + /* If rollup, calculate the upper sum levels */ + for ( ; *func_ptr ; func_ptr++) + { + if ((*func_ptr)->aggregator_add()) + return 1; + } + return 0; +} + + +static bool +update_sum_func(Item_sum **func_ptr) +{ + Item_sum *func; + for (; (func= (Item_sum*) *func_ptr) ; func_ptr++) + if (func->aggregator_add()) + return 1; + return 0; +} + +/** + Copy result of functions to record in tmp_table. + + Uses the thread pointer to check for errors in + some of the val_xxx() methods called by the + save_in_result_field() function. + TODO: make the Item::val_xxx() return error code + + @param func_ptr array of the function Items to copy to the tmp table + @param thd pointer to the current thread for error checking + @retval + FALSE if OK + @retval + TRUE on error +*/ + +bool +copy_funcs(Item **func_ptr, const THD *thd) +{ + Item *func; + for (; (func = *func_ptr) ; func_ptr++) + { + if (func->type() == Item::FUNC_ITEM && + ((Item_func *) func)->with_window_func()) + continue; + func->save_in_result_field(1); + /* + Need to check the THD error state because Item::val_xxx() don't + return error code, but can generate errors + TODO: change it for a real status check when Item::val_xxx() + are extended to return status code. + */ + if (unlikely(thd->is_error())) + return TRUE; + } + return FALSE; +} + + +/** + Create a condition for a const reference and add this to the + currenct select for the table. +*/ + +static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab) +{ + DBUG_ENTER("add_ref_to_table_cond"); + if (!join_tab->ref.key_parts) + DBUG_RETURN(FALSE); + + Item_cond_and *cond= new (thd->mem_root) Item_cond_and(thd); + TABLE *table=join_tab->table; + int error= 0; + if (!cond) + DBUG_RETURN(TRUE); + + for (uint i=0 ; i < join_tab->ref.key_parts ; i++) + { + Field *field=table->field[table->key_info[join_tab->ref.key].key_part[i]. + fieldnr-1]; + Item *value=join_tab->ref.items[i]; + cond->add(new (thd->mem_root) + Item_func_equal(thd, new (thd->mem_root) Item_field(thd, field), + value), + thd->mem_root); + } + if (unlikely(thd->is_error())) + DBUG_RETURN(TRUE); + if (!cond->fixed()) + { + Item *tmp_item= (Item*) cond; + cond->fix_fields(thd, &tmp_item); + DBUG_ASSERT(cond == tmp_item); + } + if (join_tab->select) + { + Item *UNINIT_VAR(cond_copy); + if (join_tab->select->pre_idx_push_select_cond) + cond_copy= cond->copy_andor_structure(thd); + if (join_tab->select->cond) + error=(int) cond->add(join_tab->select->cond, thd->mem_root); + join_tab->select->cond= cond; + if (join_tab->select->pre_idx_push_select_cond) + { + Item *new_cond= and_conds(thd, cond_copy, + join_tab->select->pre_idx_push_select_cond); + if (new_cond->fix_fields_if_needed(thd, &new_cond)) + error= 1; + join_tab->pre_idx_push_select_cond= + join_tab->select->pre_idx_push_select_cond= new_cond; + } + join_tab->set_select_cond(cond, __LINE__); + } + else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, + (SORT_INFO*) 0, 0, &error))) + join_tab->set_select_cond(cond, __LINE__); + + DBUG_RETURN(error ? TRUE : FALSE); +} + + +/** + Free joins of subselect of this select. + + @param thd THD pointer + @param select pointer to st_select_lex which subselects joins we will free +*/ + +void free_underlaid_joins(THD *thd, SELECT_LEX *select) +{ + for (SELECT_LEX_UNIT *unit= select->first_inner_unit(); + unit; + unit= unit->next_unit()) + unit->cleanup(); +} + +/**************************************************************************** + ROLLUP handling +****************************************************************************/ + +/** + Replace occurrences of group by fields in an expression by ref items. + + The function replaces occurrences of group by fields in expr + by ref objects for these fields unless they are under aggregate + functions. + The function also corrects value of the the maybe_null attribute + for the items of all subexpressions containing group by fields. + + @b EXAMPLES + @code + SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP + SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP + @endcode + + @b IMPLEMENTATION + + The function recursively traverses the tree of the expr expression, + looks for occurrences of the group by fields that are not under + aggregate functions and replaces them for the corresponding ref items. + + @note + This substitution is needed GROUP BY queries with ROLLUP if + SELECT list contains expressions over group by attributes. + + @param thd reference to the context + @param expr expression to make replacement + @param group_list list of references to group by items + @param changed out: returns 1 if item contains a replaced field item + + @todo + - TODO: Some functions are not null-preserving. For those functions + updating of the maybe_null attribute is an overkill. + + @retval + 0 if ok + @retval + 1 on error +*/ + +static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list, + bool *changed) +{ + if (expr->argument_count()) + { + Name_resolution_context *context= &thd->lex->current_select->context; + Item **arg,**arg_end; + bool arg_changed= FALSE; + for (arg= expr->arguments(), + arg_end= expr->arguments() + expr->argument_count(); + arg != arg_end; arg++) + { + Item *item= *arg; + if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM) + { + ORDER *group_tmp; + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (item->eq(*group_tmp->item,0)) + { + Item *new_item; + if (!(new_item= new (thd->mem_root) Item_ref(thd, context, + group_tmp->item, + null_clex_str, + item->name))) + return 1; // fatal_error is set + thd->change_item_tree(arg, new_item); + arg_changed= TRUE; + } + } + } + else if (item->type() == Item::FUNC_ITEM) + { + if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed)) + return 1; + } + } + if (arg_changed) + { + expr->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP; + *changed= TRUE; + } + } + return 0; +} + + +/** Allocate memory needed for other rollup functions. */ + +bool JOIN::rollup_init() +{ + uint i,j; + Item **ref_array; + + tmp_table_param.quick_group= 0; // Can't create groups in tmp table + /* + Each group can potentially be replaced with Item_func_rollup_const() which + needs a copy_func placeholder. + */ + tmp_table_param.func_count+= send_group_parts; + rollup.state= ROLLUP::STATE_INITED; + + /* + Create pointers to the different sum function groups + These are updated by rollup_make_fields() + */ + tmp_table_param.group_parts= send_group_parts; + + Item_null_result **null_items= + static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts)); + + rollup.null_items= Item_null_array(null_items, send_group_parts); + rollup.ref_pointer_arrays= + static_cast<Ref_ptr_array*> + (thd->alloc((sizeof(Ref_ptr_array) + + all_fields.elements * sizeof(Item*)) * send_group_parts)); + rollup.fields= + static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts)); + + if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields) + return true; + + ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts); + + /* + Prepare space for field list for the different levels + These will be filled up in rollup_make_fields() + */ + for (i= 0 ; i < send_group_parts ; i++) + { + if (!(rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd))) + return true; + + List<Item> *rollup_fields= &rollup.fields[i]; + rollup_fields->empty(); + rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements); + ref_array+= all_fields.elements; + } + for (i= 0 ; i < send_group_parts; i++) + { + for (j=0 ; j < fields_list.elements ; j++) + rollup.fields[i].push_back(rollup.null_items[i], thd->mem_root); + } + List_iterator<Item> it(all_fields); + Item *item; + while ((item= it++)) + { + ORDER *group_tmp; + bool found_in_group= 0; + + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (*group_tmp->item == item) + { + item->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP; + found_in_group= 1; + break; + } + } + if (item->type() == Item::FUNC_ITEM && !found_in_group) + { + bool changed= FALSE; + if (change_group_ref(thd, (Item_func *) item, group_list, &changed)) + return 1; + /* + We have to prevent creation of a field in a temporary table for + an expression that contains GROUP BY attributes. + Marking the expression item as 'with_sum_func' will ensure this. + */ + if (changed) + item->with_flags|= item_with_t::SUM_FUNC; + } + } + return 0; +} + +/** + Wrap all constant Items in GROUP BY list. + + For ROLLUP queries each constant item referenced in GROUP BY list + is wrapped up into an Item_func object yielding the same value + as the constant item. The objects of the wrapper class are never + considered as constant items and besides they inherit all + properties of the Item_result_field class. + This wrapping allows us to ensure writing constant items + into temporary tables whenever the result of the ROLLUP + operation has to be written into a temporary table, e.g. when + ROLLUP is used together with DISTINCT in the SELECT list. + Usually when creating temporary tables for a intermidiate + result we do not include fields for constant expressions. + + @retval + 0 if ok + @retval + 1 on error +*/ + +bool JOIN::rollup_process_const_fields() +{ + ORDER *group_tmp; + Item *item; + List_iterator<Item> it(all_fields); + + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) + { + if (!(*group_tmp->item)->const_item()) + continue; + while ((item= it++)) + { + if (*group_tmp->item == item) + { + Item* new_item= new (thd->mem_root) Item_func_rollup_const(thd, item); + if (!new_item) + return 1; + new_item->fix_fields(thd, (Item **) 0); + thd->change_item_tree(it.ref(), new_item); + for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next) + { + if (*tmp->item == item) + thd->change_item_tree(tmp->item, new_item); + } + break; + } + } + it.rewind(); + } + return 0; +} + + +/** + Fill up rollup structures with pointers to fields to use. + + Creates copies of item_sum items for each sum level. + + @param fields_arg List of all fields (hidden and real ones) + @param sel_fields Pointer to selected fields + @param func Store here a pointer to all fields + + @retval + 0 if ok; + In this case func is pointing to next not used element. + @retval + 1 on error +*/ + +bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields, + Item_sum ***func) +{ + List_iterator_fast<Item> it(fields_arg); + Item *first_field= sel_fields.head(); + uint level; + + /* + Create field lists for the different levels + + The idea here is to have a separate field list for each rollup level to + avoid all runtime checks of which columns should be NULL. + + The list is stored in reverse order to get sum function in such an order + in func that it makes it easy to reset them with init_sum_functions() + + Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP + + rollup.fields[0] will contain list where a,b,c is NULL + rollup.fields[1] will contain list where b,c is NULL + ... + rollup.ref_pointer_array[#] points to fields for rollup.fields[#] + ... + sum_funcs_end[0] points to all sum functions + sum_funcs_end[1] points to all sum functions, except grand totals + ... + */ + + for (level=0 ; level < send_group_parts ; level++) + { + uint i; + uint pos= send_group_parts - level -1; + bool real_fields= 0; + Item *item; + List_iterator<Item> new_it(rollup.fields[pos]); + Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos]; + ORDER *start_group; + + /* Point to first hidden field */ + uint ref_array_ix= fields_arg.elements-1; + + /* Remember where the sum functions ends for the previous level */ + sum_funcs_end[pos+1]= *func; + + /* Find the start of the group for this level */ + for (i= 0, start_group= group_list ; + i++ < pos ; + start_group= start_group->next) + ; + + it.rewind(); + while ((item= it++)) + { + if (item == first_field) + { + real_fields= 1; // End of hidden fields + ref_array_ix= 0; + } + + if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() && + (!((Item_sum*) item)->depended_from() || + ((Item_sum *)item)->depended_from() == select_lex)) + + { + /* + This is a top level summary function that must be replaced with + a sum function that is reset for this level. + + NOTE: This code creates an object which is not that nice in a + sub select. Fortunately it's not common to have rollup in + sub selects. + */ + item= item->copy_or_same(thd); + ((Item_sum*) item)->make_unique(); + *(*func)= (Item_sum*) item; + (*func)++; + } + else + { + /* Check if this is something that is part of this group by */ + ORDER *group_tmp; + for (group_tmp= start_group, i= pos ; + group_tmp ; group_tmp= group_tmp->next, i++) + { + if (*group_tmp->item == item) + { + /* + This is an element that is used by the GROUP BY and should be + set to NULL in this level + */ + Item_null_result *null_item= new (thd->mem_root) Item_null_result(thd); + if (!null_item) + return 1; + // Value will be null sometimes + item->set_maybe_null(); + null_item->result_field= item->get_tmp_table_field(); + item= null_item; + break; + } + } + } + ref_array_start[ref_array_ix]= item; + if (real_fields) + { + (void) new_it++; // Point to next item + new_it.replace(item); // Replace previous + ref_array_ix++; + } + else + ref_array_ix--; + } + } + sum_funcs_end[0]= *func; // Point to last function + return 0; +} + +/** + Send all rollup levels higher than the current one to the client. + + @b SAMPLE + @code + SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP + @endcode + + @param idx Level we are on: + - 0 = Total sum level + - 1 = First group changed (a) + - 2 = Second group changed (a,b) + + @retval + 0 ok + @retval + 1 If send_data_failed() +*/ + +int JOIN::rollup_send_data(uint idx) +{ + uint i; + for (i= send_group_parts ; i-- > idx ; ) + { + int res= 0; + /* Get reference pointers to sum functions in place */ + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); + if ((!having || having->val_int())) + { + if (send_records < unit->lim.get_select_limit() && do_send_rows && + (res= result->send_data_with_check(rollup.fields[i], + unit, send_records)) > 0) + return 1; + if (!res) + send_records++; + } + } + /* Restore ref_pointer_array */ + set_items_ref_array(current_ref_ptrs); + return 0; +} + +/** + Write all rollup levels higher than the current one to a temp table. + + @b SAMPLE + @code + SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP + @endcode + + @param idx Level we are on: + - 0 = Total sum level + - 1 = First group changed (a) + - 2 = Second group changed (a,b) + @param table reference to temp table + + @retval + 0 ok + @retval + 1 if write_data_failed() +*/ + +int JOIN::rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param_arg, + TABLE *table_arg) +{ + uint i; + for (i= send_group_parts ; i-- > idx ; ) + { + /* Get reference pointers to sum functions in place */ + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); + if ((!having || having->val_int())) + { + int write_error; + Item *item; + List_iterator_fast<Item> it(rollup.fields[i]); + while ((item= it++)) + { + if (item->type() == Item::NULL_ITEM && item->is_result_field()) + item->save_in_result_field(1); + } + copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]); + if (unlikely((write_error= + table_arg->file->ha_write_tmp_row(table_arg->record[0])))) + { + if (create_internal_tmp_table_from_heap(thd, table_arg, + tmp_table_param_arg->start_recinfo, + &tmp_table_param_arg->recinfo, + write_error, 0, NULL)) + return 1; + } + } + } + /* Restore ref_pointer_array */ + set_items_ref_array(current_ref_ptrs); + return 0; +} + +/** + clear results if there are not rows found for group + (end_send_group/end_write_group) +*/ + +void inline JOIN::clear_sum_funcs() +{ + if (sum_funcs) + { + Item_sum *func, **func_ptr= sum_funcs; + while ((func= *(func_ptr++))) + func->clear(); + } +} + + +/* + Prepare for returning 'empty row' when there is no matching row. + + - Mark all tables with mark_as_null_row() + - Make a copy of of all simple SELECT items + - Reset all sum functions to NULL or 0. +*/ + +void JOIN::clear(table_map *cleared_tables) +{ + clear_tables(this, cleared_tables); + copy_fields(&tmp_table_param); + clear_sum_funcs(); +} + + +/** + Print an EXPLAIN line with all NULLs and given message in the 'Extra' column + + @retval + 0 ok + 1 OOM error or error from send_data() +*/ + +int print_explain_message_line(select_result_sink *result, + uint8 options, bool is_analyze, + uint select_number, + const char *select_type, + ha_rows *rows, + const char *message) +{ + /* Note: for SHOW EXPLAIN, this is caller thread's THD */ + THD *thd= result->thd; + MEM_ROOT *mem_root= thd->mem_root; + Item *item_null= new (mem_root) Item_null(thd); + List<Item> item_list; + + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number), + mem_root); + item_list.push_back(new (mem_root) Item_string_sys(thd, select_type), + mem_root); + /* `table` */ + item_list.push_back(item_null, mem_root); + + /* `partitions` */ + if (options & DESCRIBE_PARTITIONS) + item_list.push_back(item_null, mem_root); + + /* type, possible_keys, key, key_len, ref */ + for (uint i=0 ; i < 5; i++) + item_list.push_back(item_null, mem_root); + + /* `rows` */ + StringBuffer<64> rows_str; + if (rows) + { + rows_str.append_ulonglong((ulonglong)(*rows)); + item_list.push_back(new (mem_root) + Item_string_sys(thd, rows_str.ptr(), + rows_str.length()), mem_root); + } + else + item_list.push_back(item_null, mem_root); + + /* `r_rows` */ + if (is_analyze) + item_list.push_back(item_null, mem_root); + + /* `filtered` */ + if (is_analyze || options & DESCRIBE_EXTENDED) + item_list.push_back(item_null, mem_root); + + /* `r_filtered` */ + if (is_analyze) + item_list.push_back(item_null, mem_root); + + /* `Extra` */ + if (message) + item_list.push_back(new (mem_root) Item_string_sys(thd, message), + mem_root); + else + item_list.push_back(item_null, mem_root); + + if (unlikely(thd->is_error()) || unlikely(result->send_data(item_list))) + return 1; + return 0; +} + + +/* + Append MRR information from quick select to the given string +*/ + +void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res) +{ + char mrr_str_buf[128]; + mrr_str_buf[0]=0; + int len; + handler *h= quick->head->file; + len= h->multi_range_read_explain_info(quick->mrr_flags, mrr_str_buf, + sizeof(mrr_str_buf)); + if (len > 0) + { + //res->append(STRING_WITH_LEN("; ")); + res->append(mrr_str_buf, len); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, + key_map possible_keys) +{ + uint j; + for (j=0 ; j < table->s->keys ; j++) + { + if (possible_keys.is_set(j)) + if (!(list.append_str(alloc, table->key_info[j].name.str))) + return 1; + } + return 0; +} + + +bool JOIN_TAB::save_explain_data(Explain_table_access *eta, + table_map prefix_tables, + bool distinct_arg, JOIN_TAB *first_top_tab) +{ + int quick_type; + CHARSET_INFO *cs= system_charset_info; + THD *thd= join->thd; + TABLE_LIST *table_list= table->pos_in_table_list; + QUICK_SELECT_I *cur_quick= NULL; + my_bool key_read; + char table_name_buffer[SAFE_NAME_LEN]; + KEY *key_info= 0; + uint key_len= 0; + quick_type= -1; + + explain_plan= eta; + eta->key.clear(); + eta->quick_info= NULL; + + /* + We assume that if this table does pre-sorting, then it doesn't do filtering + with SQL_SELECT. + */ + DBUG_ASSERT(!(select && filesort)); + const SQL_SELECT *tab_select= get_sql_select(); + + if (filesort) + { + if (!(eta->pre_join_sort= + new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, + thd->lex->analyze_stmt, + filesort))) + return 1; + } + // psergey-todo: data for filtering! + tracker= &eta->tracker; + jbuf_tracker= &eta->jbuf_tracker; + jbuf_loops_tracker= &eta->jbuf_loops_tracker; + jbuf_unpack_tracker= &eta->jbuf_unpack_tracker; + + /* Enable the table access time tracker only for "ANALYZE stmt" */ + if (unlikely(thd->lex->analyze_stmt || + thd->variables.log_slow_verbosity & + LOG_SLOW_VERBOSITY_ENGINE)) + { + table->file->set_time_tracker(&eta->op_tracker); + + /* + Set handler_for_stats even if we are not running an ANALYZE command. + There's no harm, and in case somebody runs a SHOW ANALYZE command we'll + be able to print the engine statistics. + */ + if (table->file->handler_stats && + table->s->tmp_table != INTERNAL_TMP_TABLE) + eta->handler_for_stats= table->file; + + if (likely(thd->lex->analyze_stmt)) + { + eta->op_tracker.set_gap_tracker(&eta->extra_time_tracker); + eta->jbuf_unpack_tracker.set_gap_tracker(&eta->jbuf_extra_time_tracker); + } + } + /* No need to save id and select_type here, they are kept in Explain_select */ + + /* table */ + if (table->derived_select_number) + { + /* Derived table name generation */ + size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1, + "<derived%u>", + table->derived_select_number); + eta->table_name.copy(table_name_buffer, len, cs); + } + else if (bush_children) + { + JOIN_TAB *ctab= bush_children->start; + /* table */ + size_t len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<subquery%d>", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + eta->table_name.copy(table_name_buffer, len, cs); + } + else + { + TABLE_LIST *real_table= table->pos_in_table_list; + /* + When multi-table UPDATE/DELETE does updates/deletes to a VIEW, the view + is merged in a certain particular way (grep for DT_MERGE_FOR_INSERT). + + As a result, view's underlying tables have $tbl->pos_in_table_list={view}. + We don't want to print view name in EXPLAIN, we want underlying table's + alias (like specified in the view definition). + */ + if (real_table->merged_for_insert) + { + TABLE_LIST *view_child= + real_table->view->first_select_lex()->table_list.first; + for (;view_child; view_child= view_child->next_local) + { + if (view_child->table == table) + { + real_table= view_child; + break; + } + } + } + eta->table_name.copy(real_table->alias.str, real_table->alias.length, cs); + } + + /* "partitions" column */ + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if (!table->derived_select_number && + (part_info= table->part_info)) + { //TODO: all thd->mem_root here should be fixed + make_used_partitions_str(thd->mem_root, part_info, &eta->used_partitions, + eta->used_partitions_list); + eta->used_partitions_set= true; + } + else + eta->used_partitions_set= false; +#else + /* just produce empty column if partitioning is not compiled in */ + eta->used_partitions_set= false; +#endif + } + + /* "type" column */ + enum join_type tab_type= type; + if ((type == JT_ALL || type == JT_HASH) && + tab_select && tab_select->quick && use_quick != 2) + { + cur_quick= tab_select->quick; + quick_type= cur_quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) + tab_type= type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE; + else + tab_type= type == JT_ALL ? JT_RANGE : JT_HASH_RANGE; + } + eta->type= tab_type; + + /* Build "possible_keys" value */ + // psergey-todo: why does this use thd MEM_ROOT??? Doesn't this + // break ANALYZE ? thd->mem_root will be freed, and after that we will + // attempt to print the query plan? + if (append_possible_keys(thd->mem_root, eta->possible_keys, table, keys)) + return 1; + // psergey-todo: ^ check for error return code + + /* Build "key", "key_len", and "ref" */ + + if (rowid_filter) + { + Range_rowid_filter *range_filter= (Range_rowid_filter *) rowid_filter; + QUICK_SELECT_I *quick= range_filter->get_select()->quick; + + Explain_rowid_filter *erf= new (thd->mem_root) Explain_rowid_filter; + erf->quick= quick->get_explain(thd->mem_root); + erf->selectivity= range_rowid_filter_info->selectivity; + erf->rows= quick->records; + if (!(erf->tracker= new Rowid_filter_tracker(thd->lex->analyze_stmt))) + return 1; + rowid_filter->set_tracker(erf->tracker); + eta->rowid_filter= erf; + } + + if (tab_type == JT_NEXT) + { + key_info= table->key_info+index; + key_len= key_info->key_length; + } + else if (ref.key_parts) + { + key_info= get_keyinfo_by_key_no(ref.key); + key_len= ref.key_length; + } + + /* + In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type + that still have quick selects. + */ + if (tab_select && tab_select->quick && tab_type != JT_CONST) + { + if (!(eta->quick_info= tab_select->quick->get_explain(thd->mem_root))) + return 1; + } + + if (key_info) /* 'index' or 'ref' access */ + { + eta->key.set(thd->mem_root, key_info, key_len); + + if (ref.key_parts && tab_type != JT_FT) + { + store_key **key_ref= ref.key_copy; + for (uint kp= 0; kp < ref.key_parts; kp++) + { + if ((key_part_map(1) << kp) & ref.const_ref_part_map) + { + if (!(eta->ref_list.append_str(thd->mem_root, "const"))) + return 1; + /* + create_ref_for_key() handles keypart=const equalities as follows: + - non-EXPLAIN execution will copy the "const" to lookup tuple + immediately and will not add an element to ref.key_copy + - EXPLAIN will put an element into ref.key_copy. Since we've + just printed "const" for it, we should skip it here + */ + if (thd->lex->describe) + key_ref++; + } + else + { + if (!(eta->ref_list.append_str(thd->mem_root, (*key_ref)->name()))) + return 1; + key_ref++; + } + } + } + } + + if (tab_type == JT_HASH_NEXT) /* full index scan + hash join */ + { + eta->hash_next_key.set(thd->mem_root, + & table->key_info[index], + table->key_info[index].key_length); + // psergey-todo: ^ is the above correct? are we necessarily joining on all + // columns? + } + + if (!key_info) + { + if (table_list && /* SJM bushes don't have table_list */ + table_list->schema_table && + table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) + { + IS_table_read_plan *is_table_read_plan= table_list->is_table_read_plan; + StringBuffer<64> key_name_buf; + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) + { + /* The "key" has the name of the column referring to the database */ + int f_idx= table_list->schema_table->idx_field1; + LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name(); + key_name_buf.append(tmp, cs); + } + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_table_lookup_value()) + { + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) + key_name_buf.append(','); + + int f_idx= table_list->schema_table->idx_field2; + LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name(); + key_name_buf.append(tmp, cs); + } + + if (key_name_buf.length()) + eta->key.set_pseudo_key(thd->mem_root, key_name_buf.c_ptr_safe()); + } + } + + /* "rows" */ + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table) + { + /* I_S tables have rows=extra=NULL */ + eta->rows_set= false; + eta->filtered_set= false; + } + else + { + ha_rows examined_rows= get_examined_rows(); + + eta->rows_set= true; + eta->rows= examined_rows; + + /* "filtered" */ + float f= 0.0; + if (examined_rows) + { + double pushdown_cond_selectivity= cond_selectivity; + if (pushdown_cond_selectivity == 1.0) + f= (float) (100.0 * records_read / examined_rows); + else + f= (float) (100.0 * pushdown_cond_selectivity); + } + set_if_smaller(f, 100.0); + eta->filtered_set= true; + eta->filtered= f; + } + + /* Build "Extra" field and save it */ + key_read= table->file->keyread_enabled(); + if ((tab_type == JT_NEXT || tab_type == JT_CONST) && + table->covering_keys.is_set(index)) + key_read=1; + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT && + !((QUICK_ROR_INTERSECT_SELECT*)cur_quick)->need_to_fetch_row) + key_read=1; + + if (table_list->table_function) + eta->push_extra(ET_TABLE_FUNCTION); + + if (info) + { + eta->push_extra(info); + } + else if (packed_info & TAB_INFO_HAVE_VALUE) + { + if (packed_info & TAB_INFO_USING_INDEX) + eta->push_extra(ET_USING_INDEX); + if (packed_info & TAB_INFO_USING_WHERE) + eta->push_extra(ET_USING_WHERE); + if (packed_info & TAB_INFO_FULL_SCAN_ON_NULL) + eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY); + } + else + { + uint keyno= MAX_KEY; + if (ref.key_parts) + keyno= ref.key; + else if (tab_select && cur_quick) + keyno = cur_quick->index; + + if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno && + table->file->pushed_idx_cond) + { + eta->push_extra(ET_USING_INDEX_CONDITION); + eta->pushed_index_cond= table->file->pushed_idx_cond; + } + else if (cache_idx_cond) + { + eta->push_extra(ET_USING_INDEX_CONDITION_BKA); + eta->pushed_index_cond= cache_idx_cond; + } + + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + { + eta->push_extra(ET_USING); + } + if (tab_select) + { + if (use_quick == 2) + { + eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD); + eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer; + if (eta->range_checked_fer) + eta->range_checked_fer-> + append_possible_keys_stat(thd->mem_root, table, keys); + } + else if (tab_select->cond || + (cache_select && cache_select->cond)) + { + const COND *pushed_cond= table->file->pushed_cond; + + if ((table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && + pushed_cond) + { + eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION); + } + else + { + eta->where_cond= tab_select->cond; + eta->cache_cond= cache_select? cache_select->cond : NULL; + eta->push_extra(ET_USING_WHERE); + } + } + } + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table && + table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) + { + if (!table_list->table_open_method) + eta->push_extra(ET_SKIP_OPEN_TABLE); + else if (table_list->table_open_method == OPEN_FRM_ONLY) + eta->push_extra(ET_OPEN_FRM_ONLY); + else + eta->push_extra(ET_OPEN_FULL_TABLE); + /* psergey-note: the following has a bug.*/ + if (table_list->is_table_read_plan->trivial_show_command || + (table_list->is_table_read_plan->has_db_lookup_value() && + table_list->is_table_read_plan->has_table_lookup_value())) + eta->push_extra(ET_SCANNED_0_DATABASES); + else if (table_list->is_table_read_plan->has_db_lookup_value() || + table_list->is_table_read_plan->has_table_lookup_value()) + eta->push_extra(ET_SCANNED_1_DATABASE); + else + eta->push_extra(ET_SCANNED_ALL_DATABASES); + } + if (key_read) + { + if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) + { + QUICK_GROUP_MIN_MAX_SELECT *qgs= + (QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick; + eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY); + eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning(); + } + else + eta->push_extra(ET_USING_INDEX); + } + if (table->reginfo.not_exists_optimize) + eta->push_extra(ET_NOT_EXISTS); + + if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) + { + explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick), + &eta->mrr_type); + if (eta->mrr_type.length() > 0) + eta->push_extra(ET_USING_MRR); + } + + if (shortcut_for_distinct) + eta->push_extra(ET_DISTINCT); + + if (loosescan_match_tab) + { + eta->push_extra(ET_LOOSESCAN); + } + + if (first_weedout_table) + { + eta->start_dups_weedout= true; + eta->push_extra(ET_START_TEMPORARY); + } + if (check_weed_out_table) + { + eta->push_extra(ET_END_TEMPORARY); + eta->end_dups_weedout= true; + } + + else if (do_firstmatch) + { + if (do_firstmatch == /*join->join_tab*/ first_top_tab - 1) + eta->push_extra(ET_FIRST_MATCH); + else + { + eta->push_extra(ET_FIRST_MATCH); + TABLE *prev_table=do_firstmatch->table; + if (prev_table->derived_select_number) + { + char namebuf[NAME_LEN]; + /* Derived table name generation */ + size_t len= my_snprintf(namebuf, sizeof(namebuf)-1, + "<derived%u>", + prev_table->derived_select_number); + eta->firstmatch_table_name.append(namebuf, len); + } + else + eta->firstmatch_table_name.append(&prev_table->pos_in_table_list->alias); + } + } + + for (uint part= 0; part < ref.key_parts; part++) + { + if (ref.cond_guards[part]) + { + eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY); + eta->full_scan_on_null_key= true; + break; + } + } + + if (cache) + { + eta->push_extra(ET_USING_JOIN_BUFFER); + if (cache->save_explain_data(&eta->bka_type)) + return 1; + } + } + + /* + In case this is a derived table, here we remember the number of + subselect that used to produce it. + */ + if (!(table_list && table_list->is_with_table_recursive_reference())) + eta->derived_select_number= table->derived_select_number; + + /* The same for non-merged semi-joins */ + eta->non_merged_sjm_number = get_non_merged_semijoin_select(); + + return 0; +} + + +/* + Walk through join->aggr_tables and save aggregation/grouping query plan into + an Explain_select object + + @retval + 0 ok + 1 error +*/ + +bool save_agg_explain_data(JOIN *join, Explain_select *xpl_sel) +{ + JOIN_TAB *join_tab=join->join_tab + join->exec_join_tab_cnt(); + Explain_aggr_node *prev_node; + Explain_aggr_node *node= xpl_sel->aggr_tree; + bool is_analyze= join->thd->lex->analyze_stmt; + THD *thd= join->thd; + + for (uint i= 0; i < join->aggr_tables; i++, join_tab++) + { + // Each aggregate means a temp.table + prev_node= node; + if (!(node= new (thd->mem_root) Explain_aggr_tmp_table)) + return 1; + node->child= prev_node; + + if (join_tab->window_funcs_step) + { + Explain_aggr_node *new_node= + join_tab->window_funcs_step->save_explain_plan(thd->mem_root, + is_analyze); + if (!new_node) + return 1; + + prev_node=node; + node= new_node; + node->child= prev_node; + } + + /* The below matches execution in join_init_read_record() */ + if (join_tab->distinct) + { + prev_node= node; + if (!(node= new (thd->mem_root) Explain_aggr_remove_dups)) + return 1; + node->child= prev_node; + } + + if (join_tab->filesort) + { + Explain_aggr_filesort *eaf = + new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort); + if (!eaf) + return 1; + prev_node= node; + node= eaf; + node->child= prev_node; + } + } + xpl_sel->aggr_tree= node; + return 0; +} + + +/** + Save Query Plan Footprint + + @note + Currently, this function may be called multiple times + + @retval + 0 ok + 1 error +*/ + +int JOIN::save_explain_data_intern(Explain_query *output, + bool need_tmp_table_arg, + bool need_order_arg, bool distinct_arg, + const char *message) +{ + JOIN *join= this; /* Legacy: this code used to be a non-member function */ + DBUG_ENTER("JOIN::save_explain_data_intern"); + DBUG_PRINT("info", ("Select %p (%u), type %s, message %s", + join->select_lex, join->select_lex->select_number, + join->select_lex->type, + message ? message : "NULL")); + DBUG_ASSERT(have_query_plan == QEP_AVAILABLE); + /* fake_select_lex is created/printed by Explain_union */ + DBUG_ASSERT(join->select_lex != join->unit->fake_select_lex); + + /* There should be no attempts to save query plans for merged selects */ + DBUG_ASSERT(!join->select_lex->master_unit()->derived || + join->select_lex->master_unit()->derived->is_materialized_derived() || + join->select_lex->master_unit()->derived->is_with_table()); + + /* Don't log this into the slow query log */ + + if (message) + { + if (!(explain= new (output->mem_root) + Explain_select(output->mem_root, + thd->lex->analyze_stmt))) + DBUG_RETURN(1); +#ifndef DBUG_OFF + explain->select_lex= select_lex; +#endif + join->select_lex->set_explain_type(true); + + explain->select_id= join->select_lex->select_number; + explain->select_type= join->select_lex->type; + explain->linkage= select_lex->get_linkage(); + explain->using_temporary= need_tmp; + explain->using_filesort= need_order_arg; + /* Setting explain->message means that all other members are invalid */ + explain->message= message; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + if (save_agg_explain_data(this, explain)) + DBUG_RETURN(1); + + output->add_node(explain); + } + else if (pushdown_query) + { + if (!(explain= new (output->mem_root) + Explain_select(output->mem_root, + thd->lex->analyze_stmt))) + DBUG_RETURN(1); + select_lex->set_explain_type(true); + + explain->select_id= select_lex->select_number; + explain->select_type= select_lex->type; + explain->linkage= select_lex->get_linkage(); + explain->using_temporary= need_tmp; + explain->using_filesort= need_order_arg; + explain->message= "Storage engine handles GROUP BY"; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + output->add_node(explain); + } + else + { + Explain_select *xpl_sel; + explain= xpl_sel= + new (output->mem_root) Explain_select(output->mem_root, + thd->lex->analyze_stmt); + if (!explain) + DBUG_RETURN(1); + + table_map used_tables=0; + + join->select_lex->set_explain_type(true); + xpl_sel->select_id= join->select_lex->select_number; + xpl_sel->select_type= join->select_lex->type; + xpl_sel->linkage= select_lex->get_linkage(); + xpl_sel->is_lateral= ((select_lex->get_linkage() == DERIVED_TABLE_TYPE) && + (select_lex->uncacheable & UNCACHEABLE_DEPENDENT)); + if (select_lex->master_unit()->derived) + xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + + if (save_agg_explain_data(this, xpl_sel)) + DBUG_RETURN(1); + + xpl_sel->exec_const_cond= exec_const_cond; + xpl_sel->outer_ref_cond= outer_ref_cond; + xpl_sel->pseudo_bits_cond= pseudo_bits_cond; + if (tmp_having) + xpl_sel->having= tmp_having; + else + xpl_sel->having= having; + xpl_sel->having_value= having_value; + + JOIN_TAB* const first_top_tab= join->first_breadth_first_tab(); + JOIN_TAB* prev_bush_root_tab= NULL; + + Explain_basic_join *cur_parent= xpl_sel; + + for (JOIN_TAB *tab= first_explain_order_tab(join); tab; + tab= next_explain_order_tab(join, tab)) + { + JOIN_TAB *saved_join_tab= NULL; + TABLE *cur_table= tab->table; + + /* Don't show eliminated tables */ + if (cur_table->map & join->eliminated_tables) + { + used_tables|= cur_table->map; + continue; + } + + + Explain_table_access *eta= (new (output->mem_root) + Explain_table_access(output->mem_root, + thd->lex->analyze_stmt)); + + if (!eta) + DBUG_RETURN(1); + if (tab->bush_root_tab != prev_bush_root_tab) + { + if (tab->bush_root_tab) + { + /* + We've entered an SJ-Materialization nest. Create an object for it. + */ + if (!(cur_parent= + new (output->mem_root) Explain_basic_join(output->mem_root))) + DBUG_RETURN(1); + + JOIN_TAB *first_child= tab->bush_root_tab->bush_children->start; + cur_parent->select_id= + first_child->emb_sj_nest->sj_subq_pred->get_identifier(); + } + else + { + /* + We've just left an SJ-Materialization nest. We are at the join tab + that 'embeds the nest' + */ + DBUG_ASSERT(tab->bush_children); + eta->sjm_nest= cur_parent; + cur_parent= xpl_sel; + } + } + prev_bush_root_tab= tab->bush_root_tab; + + cur_parent->add_table(eta, output); + if (tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab)) + DBUG_RETURN(1); + + if (saved_join_tab) + tab= saved_join_tab; + + // For next iteration + used_tables|= cur_table->map; + } + output->add_node(xpl_sel); + } + + /* + Don't try to add query plans for child selects if this select was pushed + down into a Smart Storage Engine: + - the entire statement was pushed down ("PUSHED SELECT"), or + - this derived table was pushed down ("PUSHED DERIVED") + */ + if (!select_lex->pushdown_select && select_lex->type != pushed_derived_text) + for (SELECT_LEX_UNIT *tmp_unit= join->select_lex->first_inner_unit(); + tmp_unit; + tmp_unit= tmp_unit->next_unit()) + if (tmp_unit->explainable()) + explain->add_child(tmp_unit->first_select()->select_number); + + if (select_lex->is_top_level_node()) + output->query_plan_ready(); + + DBUG_RETURN(0); +} + + +/* + This function serves as "shortcut point" for EXPLAIN queries. + + The EXPLAIN statement executes just like its SELECT counterpart would + execute, except that JOIN::exec() will call select_describe() instead of + actually executing the query. + + Inside select_describe(): + - Query plan is updated with latest QEP choices made at the start of + JOIN::exec(). + - the proces of "almost execution" is invoked for the children subqueries. + + Overall, select_describe() is a legacy of old EXPLAIN implementation and + should be removed. +*/ + +static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, + bool distinct,const char *message) +{ + THD *thd=join->thd; + select_result *result=join->result; + DBUG_ENTER("select_describe"); + + if (join->select_lex->pushdown_select) + { + /* + The whole statement was pushed down to a Smart Storage Engine. Do not + attempt to produce a query plan locally. + */ + DBUG_VOID_RETURN; + } + + /* Update the QPF with latest values of using_temporary, using_filesort */ + for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit(); + unit; + unit= unit->next_unit()) + { + /* + This fix_fields() call is to handle an edge case like this: + + SELECT ... UNION SELECT ... ORDER BY (SELECT ...) + + for such queries, we'll get here before having called + subquery_expr->fix_fields(), which will cause failure to + */ + if (unit->item && !unit->item->fixed()) + { + Item *ref= unit->item; + if (unit->item->fix_fields(thd, &ref)) + DBUG_VOID_RETURN; + DBUG_ASSERT(ref == unit->item); + } + + if (unit->explainable()) + { + if (mysql_explain_union(thd, unit, result)) + DBUG_VOID_RETURN; + } + } + DBUG_VOID_RETURN; +} + + +bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) +{ + DBUG_ENTER("mysql_explain_union"); + bool res= 0; + SELECT_LEX *first= unit->first_select(); + bool is_pushed_union= unit->derived && unit->derived->pushdown_derived; + + for (SELECT_LEX *sl= first; sl; sl= sl->next_select()) + { + sl->set_explain_type(FALSE); + sl->options|= SELECT_DESCRIBE; + } + + if (unit->is_unit_op() || unit->fake_select_lex) + { + ulonglong save_options= 0; + + if (unit->union_needs_tmp_table() && unit->fake_select_lex) + { + save_options= unit->fake_select_lex->options; + unit->fake_select_lex->select_number= FAKE_SELECT_LEX_ID; // just for initialization + unit->fake_select_lex->type= unit_operation_text[unit->common_op()]; + unit->fake_select_lex->options|= SELECT_DESCRIBE; + } + if (!(res= unit->prepare(unit->derived, result, + SELECT_NO_UNLOCK | SELECT_DESCRIBE))) + { + if (!is_pushed_union) + res= unit->exec(); + } + + if (unit->union_needs_tmp_table() && unit->fake_select_lex) + unit->fake_select_lex->options= save_options; + } + else + { + thd->lex->current_select= first; + unit->set_limit(unit->global_parameters()); + res= mysql_select(thd, first->table_list.first, first->item_list, + first->where, + first->order_list.elements + first->group_list.elements, + first->order_list.first, first->group_list.first, + first->having, thd->lex->proc_list.first, + first->options | thd->variables.option_bits | SELECT_DESCRIBE, + result, unit, first); + } + + DBUG_RETURN(res || thd->is_error()); +} + + +static void print_table_array(THD *thd, + table_map eliminated_tables, + String *str, TABLE_LIST **table, + TABLE_LIST **end, + enum_query_type query_type) +{ + (*table)->print(thd, eliminated_tables, str, query_type); + + for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++) + { + TABLE_LIST *curr= *tbl; + + /* + The "eliminated_tables &&" check guards againist the case of + printing the query for CREATE VIEW. We do that without having run + JOIN::optimize() and so will have nested_join->used_tables==0. + */ + if (eliminated_tables && + ((curr->table && (curr->table->map & eliminated_tables)) || + (curr->nested_join && !(curr->nested_join->used_tables & + ~eliminated_tables)))) + { + /* as of 5.5, print_join doesnt put eliminated elements into array */ + DBUG_ASSERT(0); + continue; + } + + /* JOIN_TYPE_OUTER is just a marker unrelated to real join */ + if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT)) + { + /* MySQL converts right to left joins */ + str->append(STRING_WITH_LEN(" left join ")); + } + else if (curr->straight) + str->append(STRING_WITH_LEN(" straight_join ")); + else if (curr->sj_inner_tables) + str->append(STRING_WITH_LEN(" semi join ")); + else + str->append(STRING_WITH_LEN(" join ")); + + curr->print(thd, eliminated_tables, str, query_type); + if (curr->on_expr) + { + str->append(STRING_WITH_LEN(" on(")); + curr->on_expr->print(str, query_type); + str->append(')'); + } + } +} + + +/* + Check if the passed table is + - a base table which was eliminated, or + - a join nest which only contained eliminated tables (and so was eliminated, + too) +*/ + +bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl) +{ + return eliminated_tables && + ((tbl->table && (tbl->table->map & eliminated_tables)) || + (tbl->nested_join && !(tbl->nested_join->used_tables & + ~eliminated_tables))); +} + +/** + Print joins from the FROM clause. + + @param thd thread handler + @param str string where table should be printed + @param tables list of tables in join + @query_type type of the query is being generated +*/ + +static void print_join(THD *thd, + table_map eliminated_tables, + String *str, + List<TABLE_LIST> *tables, + enum_query_type query_type) +{ + /* List is reversed => we should reverse it before using */ + List_iterator_fast<TABLE_LIST> ti(*tables); + TABLE_LIST **table; + DBUG_ENTER("print_join"); + + /* + If the QT_NO_DATA_EXPANSION flag is specified, we print the + original table list, including constant tables that have been + optimized away, as the constant tables may be referenced in the + expression printed by Item_field::print() when this flag is given. + Otherwise, only non-const tables are printed. + + Example: + + Original SQL: + select * from (select 1) t + + Printed without QT_NO_DATA_EXPANSION: + select '1' AS `1` from dual + + Printed with QT_NO_DATA_EXPANSION: + select `t`.`1` from (select 1 AS `1`) `t` + */ + const bool print_const_tables= (query_type & QT_NO_DATA_EXPANSION); + size_t tables_to_print= 0; + + for (TABLE_LIST *t= ti++; t ; t= ti++) + { + /* See comment in print_table_array() about the second condition */ + if (print_const_tables || !t->optimized_away) + if (!is_eliminated_table(eliminated_tables, t)) + tables_to_print++; + } + if (tables_to_print == 0) + { + str->append(STRING_WITH_LEN("dual")); + DBUG_VOID_RETURN; // all tables were optimized away + } + ti.rewind(); + + if (!(table= static_cast<TABLE_LIST **>(thd->alloc(sizeof(TABLE_LIST*) * + tables_to_print)))) + DBUG_VOID_RETURN; // out of memory + + TABLE_LIST *tmp, **t= table + (tables_to_print - 1); + while ((tmp= ti++)) + { + if (tmp->optimized_away && !print_const_tables) + continue; + if (is_eliminated_table(eliminated_tables, tmp)) + continue; + *t--= tmp; + } + + DBUG_ASSERT(tables->elements >= 1); + /* + Assert that the first table in the list isn't eliminated. This comes from + the fact that the first table can't be inner table of an outer join. + */ + DBUG_ASSERT(!eliminated_tables || + !(((*table)->table && ((*table)->table->map & eliminated_tables)) || + ((*table)->nested_join && !((*table)->nested_join->used_tables & + ~eliminated_tables)))); + /* + If the first table is a semi-join nest, swap it with something that is + not a semi-join nest. + */ + if ((*table)->sj_inner_tables) + { + TABLE_LIST **end= table + tables_to_print; + for (TABLE_LIST **t2= table; t2!=end; t2++) + { + if (!(*t2)->sj_inner_tables) + { + tmp= *t2; + *t2= *table; + *table= tmp; + break; + } + } + } + print_table_array(thd, eliminated_tables, str, table, + table + tables_to_print, query_type); + DBUG_VOID_RETURN; +} + +/** + @brief Print an index hint + + @details Prints out the USE|FORCE|IGNORE index hint. + + @param thd the current thread + @param[out] str appends the index hint here + @param hint what the hint is (as string : "USE INDEX"| + "FORCE INDEX"|"IGNORE INDEX") + @param hint_length the length of the string in 'hint' + @param indexes a list of index names for the hint +*/ + +void +Index_hint::print(THD *thd, String *str) +{ + switch (type) + { + case INDEX_HINT_IGNORE: str->append(STRING_WITH_LEN("IGNORE INDEX")); break; + case INDEX_HINT_USE: str->append(STRING_WITH_LEN("USE INDEX")); break; + case INDEX_HINT_FORCE: str->append(STRING_WITH_LEN("FORCE INDEX")); break; + } + str->append(STRING_WITH_LEN(" (")); + if (key_name.length) + { + if (thd && !system_charset_info->strnncoll( + (const uchar *)key_name.str, key_name.length, + (const uchar *)primary_key_name.str, + primary_key_name.length)) + str->append(primary_key_name); + else + append_identifier(thd, str, &key_name); +} + str->append(')'); +} + + +/** + Print table as it should be in join list. + + @param str string where table should be printed +*/ + +void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, + enum_query_type query_type) +{ + if (nested_join) + { + str->append('('); + print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); + str->append(')'); + } + else if (jtbm_subselect) + { + if (jtbm_subselect->engine->engine_type() == + subselect_engine::SINGLE_SELECT_ENGINE) + { + /* + We get here when conversion into materialization didn't finish (this + happens when + - The subquery is a degenerate case which produces 0 or 1 record + - subquery's optimization didn't finish because of @@max_join_size + limits + - ... maybe some other cases like this + */ + str->append(STRING_WITH_LEN(" <materialize> (")); + jtbm_subselect->engine->print(str, query_type); + str->append(')'); + } + else + { + str->append(STRING_WITH_LEN(" <materialize> (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } + } + else + { + const char *cmp_name; // Name to compare with alias + if (view_name.str) + { + // A view + + if (!(belong_to_view && + belong_to_view->compact_view_format) && + !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES)) + { + append_identifier(thd, str, &view_db); + str->append('.'); + } + append_identifier(thd, str, &view_name); + cmp_name= view_name.str; + } + else if (derived) + { + if (!is_with_table()) + { + // A derived table + str->append('('); + derived->print(str, query_type); + str->append(')'); + cmp_name= ""; // Force printing of alias + } + else + { + append_identifier(thd, str, &table_name); + cmp_name= table_name.str; + } + } + else if (table_function) + { + /* A table function. */ + (void) table_function->print(thd, this, str, query_type); + str->append(' '); + append_identifier(thd, str, &alias); + cmp_name= alias.str; + } + else + { + // A normal table + + if (!(belong_to_view && + belong_to_view->compact_view_format) && + !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES)) + { + append_identifier(thd, str, &db); + str->append('.'); + } + if (schema_table) + { + append_identifier(thd, str, &schema_table_name); + cmp_name= schema_table_name.str; + } + else + { + append_identifier(thd, str, &table_name); + cmp_name= table_name.str; + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_names && partition_names->elements) + { + int i, num_parts= partition_names->elements; + List_iterator<String> name_it(*(partition_names)); + str->append(STRING_WITH_LEN(" PARTITION (")); + for (i= 1; i <= num_parts; i++) + { + String *name= name_it++; + append_identifier(thd, str, name->ptr(), name->length()); + if (i != num_parts) + str->append(','); + } + str->append(')'); + } +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + } + if (table && table->versioned()) + vers_conditions.print(str, query_type); + + if (my_strcasecmp(table_alias_charset, cmp_name, alias.str)) + { + char t_alias_buff[MAX_ALIAS_NAME]; + LEX_CSTRING t_alias= alias; + + str->append(' '); + if (lower_case_table_names == 1) + { + if (alias.str && alias.str[0]) + { + strmov(t_alias_buff, alias.str); + t_alias.length= my_casedn_str(files_charset_info, t_alias_buff); + t_alias.str= t_alias_buff; + } + } + + append_identifier(thd, str, &t_alias); + } + + if (index_hints) + { + List_iterator<Index_hint> it(*index_hints); + Index_hint *hint; + + while ((hint= it++)) + { + str->append(' '); + hint->print(thd, str); + } + } + } +} + +enum explainable_cmd_type +{ + SELECT_CMD, INSERT_CMD, REPLACE_CMD, UPDATE_CMD, DELETE_CMD, NO_CMD +}; + +static +const LEX_CSTRING explainable_cmd_name []= +{ + {STRING_WITH_LEN("select ")}, + {STRING_WITH_LEN("insert ")}, + {STRING_WITH_LEN("replace ")}, + {STRING_WITH_LEN("update ")}, + {STRING_WITH_LEN("delete ")}, +}; + +static +const LEX_CSTRING* get_explainable_cmd_name(enum explainable_cmd_type cmd) +{ + return explainable_cmd_name + cmd; +} + +static +enum explainable_cmd_type get_explainable_cmd_type(THD *thd) +{ + switch (thd->lex->sql_command) { + case SQLCOM_SELECT: + return SELECT_CMD; + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + return INSERT_CMD; + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + return REPLACE_CMD; + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + return UPDATE_CMD; + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + return DELETE_CMD; + default: + return SELECT_CMD; + } +} + + +void TABLE_LIST::print_leaf_tables(THD *thd, String *str, + enum_query_type query_type) +{ + if (merge_underlying_list) + { + for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local) + tbl->print_leaf_tables(thd, str, query_type); + } + else + print(thd, 0, str, query_type); +} + + +void st_select_lex::print_item_list(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + /* + outer_select() can not be used here because it is for name resolution + and will return NULL at any end of name resolution chain (view/derived) + */ + bool top_level= is_query_topmost(thd); + List_iterator_fast<Item> it(item_list); + Item *item; + while ((item= it++)) + { + if (first) + first= 0; + else + str->append(','); + + if ((is_subquery_function() && !item->is_explicit_name()) || + !item->name.str) + { + /* + Do not print auto-generated aliases in subqueries. It has no purpose + in a view definition or other contexts where the query is printed. + */ + item->print(str, query_type); + } + else + { + /* + Do not print illegal names (if it is not top level SELECT). + Top level view checked (and correct name are assigned), + other cases of top level SELECT are not important, because + it is not "table field". + */ + if (top_level || + item->is_explicit_name() || + !check_column_name(item->name.str)) + item->print_item_w_name(str, query_type); + else + item->print(str, query_type); + } + } +} + + +void st_select_lex::print_set_clause(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + /* + outer_select() can not be used here because it is for name resolution + and will return NULL at any end of name resolution chain (view/derived) + */ + List_iterator_fast<Item> it(item_list); + List_iterator_fast<Item> vt(thd->lex->value_list); + Item *item; + Item *val; + while ((item= it++, val= vt++ )) + { + if (first) + { + str->append(STRING_WITH_LEN(" set ")); + first= 0; + } + else + str->append(','); + + item->print(str, (enum_query_type) (query_type | QT_NO_DATA_EXPANSION)); + str->append(STRING_WITH_LEN(" = ")); + val->print(str, query_type); + } +} + + +void st_select_lex::print_on_duplicate_key_clause(THD *thd, String *str, + enum_query_type query_type) +{ + bool first= 1; + List_iterator_fast<Item> it(thd->lex->update_list); + List_iterator_fast<Item> vt(thd->lex->value_list); + Item *item; + Item *val; + while ((item= it++, val= vt++ )) + { + if (first) + { + str->append(STRING_WITH_LEN(" on duplicate key update ")); + first= 0; + } + else + str->append(','); + + item->print(str, query_type); + str->append(STRING_WITH_LEN(" = ")); + val->print(str, query_type); + } +} + +void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) +{ + DBUG_ASSERT(thd); + + if (tvc) + { + tvc->print(thd, str, query_type); + return; + } + + if (is_tvc_wrapper && (query_type & QT_NO_WRAPPERS_FOR_TVC_IN_VIEW)) + { + first_inner_unit()->first_select()->print(thd, str, query_type); + return; + } + + bool top_level= is_query_topmost(thd); + enum explainable_cmd_type sel_type= SELECT_CMD; + if (top_level) + sel_type= get_explainable_cmd_type(thd); + + if (sel_type == INSERT_CMD || sel_type == REPLACE_CMD) + { + str->append(get_explainable_cmd_name(sel_type)); + str->append(STRING_WITH_LEN("into ")); + TABLE_LIST *tbl= thd->lex->query_tables; + while (tbl->merge_underlying_list) + tbl= tbl->merge_underlying_list; + tbl->print(thd, 0, str, query_type); + if (thd->lex->field_list.elements) + { + str->append ('('); + List_iterator_fast<Item> it(thd->lex->field_list); + Item *item; + bool first= true; + while ((item= it++)) + { + if (first) + first= false; + else + str->append(','); + str->append(item->name); + } + str->append(')'); + } + + str->append(' '); + + if (thd->lex->sql_command == SQLCOM_INSERT || + thd->lex->sql_command == SQLCOM_REPLACE) + { + str->append(STRING_WITH_LEN("values ")); + bool is_first_elem= true; + List_iterator_fast<List_item> li(thd->lex->many_values); + List_item *list; + + while ((list= li++)) + { + if (is_first_elem) + is_first_elem= false; + else + str->append(','); + + print_list_item(str, list, query_type); + } + if (thd->lex->update_list.elements) + print_on_duplicate_key_clause(thd, str, query_type); + return; + } + } + + if ((query_type & QT_SHOW_SELECT_NUMBER) && + thd->lex->all_selects_list && + thd->lex->all_selects_list->link_next && + select_number != FAKE_SELECT_LEX_ID) + { + str->append(STRING_WITH_LEN("/* select#")); + str->append_ulonglong(select_number); + if (thd->lex->describe & DESCRIBE_EXTENDED2) + { + str->append('/'); + str->append_ulonglong(nest_level); + + if (master_unit()->fake_select_lex && + master_unit()->first_select() == this) + { + str->append(STRING_WITH_LEN(" Filter Select: ")); + master_unit()->fake_select_lex->print(thd, str, query_type); + } + } + str->append(STRING_WITH_LEN(" */ ")); + } + + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + str->append(STRING_WITH_LEN("select ")); + + if (join && join->cleaned) + { + /* + JOIN already cleaned up so it is dangerous to print items + because temporary tables they pointed on could be freed. + */ + str->append('#'); + str->append(select_number); + return; + } + + /* First add options */ + if (options & SELECT_STRAIGHT_JOIN) + str->append(STRING_WITH_LEN("straight_join ")); + if (options & SELECT_HIGH_PRIORITY) + str->append(STRING_WITH_LEN("high_priority ")); + if (options & SELECT_DISTINCT) + str->append(STRING_WITH_LEN("distinct ")); + if (options & SELECT_SMALL_RESULT) + str->append(STRING_WITH_LEN("sql_small_result ")); + if (options & SELECT_BIG_RESULT) + str->append(STRING_WITH_LEN("sql_big_result ")); + if (options & OPTION_BUFFER_RESULT) + str->append(STRING_WITH_LEN("sql_buffer_result ")); + if (options & OPTION_FOUND_ROWS) + str->append(STRING_WITH_LEN("sql_calc_found_rows ")); + if (this == parent_lex->first_select_lex()) + { + switch (parent_lex->sql_cache) + { + case LEX::SQL_NO_CACHE: + str->append(STRING_WITH_LEN("sql_no_cache ")); + break; + case LEX::SQL_CACHE: + str->append(STRING_WITH_LEN("sql_cache ")); + break; + case LEX::SQL_CACHE_UNSPECIFIED: + break; + default: + DBUG_ASSERT(0); + } + } + + //Item List + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + print_item_list(thd, str, query_type); + /* + from clause + TODO: support USING/FORCE/IGNORE index + */ + if (table_list.elements) + { + if (sel_type == SELECT_CMD || + sel_type == INSERT_CMD || + sel_type == REPLACE_CMD) + { + str->append(STRING_WITH_LEN(" from ")); + /* go through join tree */ + print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, + query_type); + } + if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + str->append(get_explainable_cmd_name(sel_type)); + if (sel_type == DELETE_CMD) + { + str->append(STRING_WITH_LEN(" from ")); + bool first= true; + for (TABLE_LIST *target_tbl= thd->lex->auxiliary_table_list.first; + target_tbl; + target_tbl= target_tbl->next_local) + { + if (first) + first= false; + else + str->append(','); + target_tbl->correspondent_table->print_leaf_tables(thd, str, + query_type); + } + + if (!first) + str->append(STRING_WITH_LEN(" using ")); + } + if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + { + if (join) + print_join(thd, 0, str, &top_join_list, query_type); + else + { + bool first= true; + List_iterator_fast<TABLE_LIST> li(leaf_tables); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (first) + first= false; + else + str->append(','); + tbl->print(thd, 0, str, query_type); + } + } + } + } + else if (where) + { + /* + "SELECT 1 FROM DUAL WHERE 2" should not be printed as + "SELECT 1 WHERE 2": the 1st syntax is valid, but the 2nd is not. + */ + str->append(STRING_WITH_LEN(" from DUAL ")); + } + + if (sel_type == UPDATE_CMD) + print_set_clause(thd, str, query_type); + + // Where + Item *cur_where= where; + if (join) + cur_where= join->conds; + else if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) + cur_where= thd->lex->upd_del_where; + if (cur_where || cond_value != Item::COND_UNDEF) + { + str->append(STRING_WITH_LEN(" where ")); + if (cur_where) + cur_where->print(str, query_type); + else + str->append(cond_value != Item::COND_FALSE ? '1' : '0'); + } + + // group by & olap + if (group_list.elements) + { + str->append(STRING_WITH_LEN(" group by ")); + print_order(str, group_list.first, query_type); + switch (olap) + { + case CUBE_TYPE: + str->append(STRING_WITH_LEN(" with cube")); + break; + case ROLLUP_TYPE: + str->append(STRING_WITH_LEN(" with rollup")); + break; + default: + ; //satisfy compiler + } + } + + // having + Item *cur_having= having; + if (join) + cur_having= join->having; + + if (cur_having || having_value != Item::COND_UNDEF) + { + str->append(STRING_WITH_LEN(" having ")); + if (cur_having) + cur_having->print(str, query_type); + else + str->append(having_value != Item::COND_FALSE ? '1' : '0'); + } + + if (order_list.elements) + { + str->append(STRING_WITH_LEN(" order by ")); + print_order(str, order_list.first, query_type); + } + + // limit + print_limit(thd, str, query_type); + + // lock type + if (select_lock == select_lock_type::IN_SHARE_MODE) + str->append(STRING_WITH_LEN(" lock in share mode")); + else if (select_lock == select_lock_type::FOR_UPDATE) + str->append(STRING_WITH_LEN(" for update")); + if (unlikely(skip_locked)) + str->append(STRING_WITH_LEN(" skip locked")); + + if ((sel_type == INSERT_CMD || sel_type == REPLACE_CMD) && + thd->lex->update_list.elements) + print_on_duplicate_key_clause(thd, str, query_type); + + // returning clause + if (sel_type == DELETE_CMD && !item_list.elements) + { + print_item_list(thd, str, query_type); + } + // PROCEDURE unsupported here +} + + +/** + Change the select_result object of the JOIN. + + If old_result is not used, forward the call to the current + select_result in case it is a wrapper around old_result. + + Call prepare() and prepare2() on the new select_result if we decide + to use it. + + @param new_result New select_result object + @param old_result Old select_result object (NULL to force change) + + @retval false Success + @retval true Error +*/ + +bool JOIN::change_result(select_result *new_result, select_result *old_result) +{ + DBUG_ENTER("JOIN::change_result"); + if (old_result == NULL || result == old_result) + { + result= new_result; + if (result->prepare(fields_list, select_lex->master_unit()) || + result->prepare2(this)) + DBUG_RETURN(true); /* purecov: inspected */ + DBUG_RETURN(false); + } + DBUG_RETURN(result->change_result(new_result)); +} + + +/** + @brief + Set allowed types of join caches that can be used for join operations + + @details + The function sets a bitmap of allowed join buffers types in the field + allowed_join_cache_types of this JOIN structure: + bit 1 is set if tjoin buffers are allowed to be incremental + bit 2 is set if the join buffers are allowed to be hashed + but 3 is set if the join buffers are allowed to be used for BKA + join algorithms. + The allowed types are read from system variables. + Besides the function sets maximum allowed join cache level that is + also read from a system variable. +*/ + +void JOIN::set_allowed_join_cache_types() +{ + allowed_join_cache_types= 0; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL)) + allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED)) + allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA)) + allowed_join_cache_types|= JOIN_CACHE_BKA_BIT; + allowed_semijoin_with_cache= + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE); + allowed_outer_join_with_cache= + optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE); + max_allowed_join_cache_level= thd->variables.join_cache_level; +} + + +/** + Save a query execution plan so that the caller can revert to it if needed, + and reset the current query plan so that it can be reoptimized. + + @param save_to The object into which the current query plan state is saved +*/ + +void JOIN::save_query_plan(Join_plan_state *save_to) +{ + DYNAMIC_ARRAY tmp_keyuse; + /* Swap the current and the backup keyuse internal arrays. */ + tmp_keyuse= keyuse; + keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */ + save_to->keyuse= tmp_keyuse; + + for (uint i= 0; i < table_count; i++) + { + save_to->join_tab_keyuse[i]= join_tab[i].keyuse; + join_tab[i].keyuse= NULL; + save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys; + join_tab[i].checked_keys.clear_all(); + } + memcpy((uchar*) save_to->best_positions, (uchar*) best_positions, + sizeof(POSITION) * (table_count + 1)); + memset((uchar*) best_positions, 0, sizeof(POSITION) * (table_count + 1)); + + /* Save SJM nests */ + List_iterator<TABLE_LIST> it(select_lex->sj_nests); + TABLE_LIST *tlist; + SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info; + while ((tlist= it++)) + { + *(p_info++)= tlist->sj_mat_info; + } +} + + +/** + Reset a query execution plan so that it can be reoptimized in-place. +*/ +void JOIN::reset_query_plan() +{ + for (uint i= 0; i < table_count; i++) + { + join_tab[i].keyuse= NULL; + join_tab[i].checked_keys.clear_all(); + } +} + + +/** + Restore a query execution plan previously saved by the caller. + + @param The object from which the current query plan state is restored. +*/ + +void JOIN::restore_query_plan(Join_plan_state *restore_from) +{ + DYNAMIC_ARRAY tmp_keyuse; + tmp_keyuse= keyuse; + keyuse= restore_from->keyuse; + restore_from->keyuse= tmp_keyuse; + + for (uint i= 0; i < table_count; i++) + { + join_tab[i].keyuse= restore_from->join_tab_keyuse[i]; + join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i]; + } + + memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions, + sizeof(POSITION) * (table_count + 1)); + /* Restore SJM nests */ + List_iterator<TABLE_LIST> it(select_lex->sj_nests); + TABLE_LIST *tlist; + SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info; + while ((tlist= it++)) + { + tlist->sj_mat_info= *(p_info++); + } +} + + +/** + Reoptimize a query plan taking into account an additional conjunct to the + WHERE clause. + + @param added_where An extra conjunct to the WHERE clause to reoptimize with + @param join_tables The set of tables to reoptimize + @param save_to If != NULL, save here the state of the current query plan, + otherwise reuse the existing query plan structures. + + @notes + Given a query plan that was already optimized taking into account some WHERE + clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. + The reoptimization works as follows: + + 1. Call update_ref_and_keys *only* for the new conditions 'added_where' + that are about to be injected into the query. + 2. Expand if necessary the original KEYUSE array JOIN::keyuse to + accommodate the new REF accesses computed for the 'added_where' condition. + 3. Add the new KEYUSEs into JOIN::keyuse. + 4. Re-sort and re-filter the JOIN::keyuse array with the newly added + KEYUSE elements. + + @retval REOPT_NEW_PLAN there is a new plan. + @retval REOPT_OLD_PLAN no new improved plan was produced, use the old one. + @retval REOPT_ERROR an irrecovarable error occurred during reoptimization. +*/ + +JOIN::enum_reopt_result +JOIN::reoptimize(Item *added_where, table_map join_tables, + Join_plan_state *save_to) +{ + DYNAMIC_ARRAY added_keyuse; + SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */ + size_t org_keyuse_elements; + + /* Re-run the REF optimizer to take into account the new conditions. */ + if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where, + ~outer_join, select_lex, &sargables)) + { + delete_dynamic(&added_keyuse); + return REOPT_ERROR; + } + + if (!added_keyuse.elements) + { + delete_dynamic(&added_keyuse); + return REOPT_OLD_PLAN; + } + + if (save_to) + save_query_plan(save_to); + else + reset_query_plan(); + + if (!keyuse.buffer && + my_init_dynamic_array(thd->mem_root->psi_key, &keyuse, sizeof(KEYUSE), + 20, 64, MYF(MY_THREAD_SPECIFIC))) + { + delete_dynamic(&added_keyuse); + return REOPT_ERROR; + } + + org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements; + allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements); + + /* If needed, add the access methods from the original query plan. */ + if (save_to) + { + DBUG_ASSERT(!keyuse.elements); + keyuse.elements= save_to->keyuse.elements; + if (size_t e= keyuse.elements) + memcpy(keyuse.buffer, + save_to->keyuse.buffer, e * keyuse.size_of_element); + } + + /* Add the new access methods to the keyuse array. */ + memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element, + added_keyuse.buffer, + (size_t) added_keyuse.elements * added_keyuse.size_of_element); + keyuse.elements+= added_keyuse.elements; + /* added_keyuse contents is copied, and it is no longer needed. */ + delete_dynamic(&added_keyuse); + + if (sort_and_filter_keyuse(this, &keyuse, true)) + return REOPT_ERROR; + optimize_keyuse(this, &keyuse); + + if (optimize_semijoin_nests(this, join_tables)) + return REOPT_ERROR; + + /* Re-run the join optimizer to compute a new query plan. */ + if (choose_plan(this, join_tables)) + return REOPT_ERROR; + + return REOPT_NEW_PLAN; +} + + +/** + Cache constant expressions in WHERE, HAVING, ON conditions. +*/ + +void JOIN::cache_const_exprs() +{ + uchar cache_flag= FALSE; + uchar *analyzer_arg= &cache_flag; + + /* No need in cache if all tables are constant. */ + if (const_tables == table_count) + return; + + if (conds) + conds->top_level_compile(thd, &Item::cache_const_expr_analyzer, &analyzer_arg, + &Item::cache_const_expr_transformer, &cache_flag); + cache_flag= FALSE; + if (having) + having->top_level_compile(thd, &Item::cache_const_expr_analyzer, + &analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag); + + for (JOIN_TAB *tab= first_depth_first_tab(this); tab; + tab= next_depth_first_tab(this, tab)) + { + if (*tab->on_expr_ref) + { + cache_flag= FALSE; + (*tab->on_expr_ref)->top_level_compile(thd, &Item::cache_const_expr_analyzer, + &analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag); + } + } +} + + +/* + Get the cost of using index keynr to read #LIMIT matching rows + + @detail + - If there is a quick select, we try to use it. + - if there is a ref(const) access, we try to use it, too. + - quick and ref(const) use different cost formulas, so if both are possible + we should make a cost-based choice. + + rows_limit is the number of rows we would need to read when using a full + index scan. This is generally higher than the N from "LIMIT N" clause, + because there's a WHERE condition (a part of which is used to construct a + range access we are considering using here) + + @param tab JOIN_TAB with table access (is NULL for single-table + UPDATE/DELETE) + @param rows_limit See explanation above + @param read_time OUT Cost of reading using quick or ref(const) access. + + + @return + true There was a possible quick or ref access, its cost is in the OUT + parameters. + false No quick or ref(const) possible (and so, the caller will attempt + to use a full index scan on this index). +*/ + +static bool get_range_limit_read_cost(const JOIN_TAB *tab, + const TABLE *table, + ha_rows table_records, + uint keynr, + ha_rows rows_limit, + double *read_time) +{ + bool res= false; + /* + We need to adjust the estimates if we had a quick select (or ref(const)) on + index keynr. + */ + if (table->opt_range_keys.is_set(keynr)) + { + /* + Start from quick select's rows and cost. These are always cheaper than + full index scan/cost. + */ + double best_rows= (double) table->opt_range[keynr].rows; + double best_cost= (double) table->opt_range[keynr].cost; + + /* + Check if ref(const) access was possible on this index. + */ + if (tab) + { + key_part_map map= 1; + uint kp; + /* Find how many key parts would be used by ref(const) */ + for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++) + { + if (!(table->const_key_parts[keynr] & map)) + break; + } + + if (kp > 0) + { + ha_rows ref_rows; + /* + Two possible cases: + 1. ref(const) uses the same #key parts as range access. + 2. ref(const) uses fewer key parts, becasue there is a + range_cond(key_part+1). + */ + if (kp == table->opt_range[keynr].key_parts) + ref_rows= table->opt_range[keynr].rows; + else + ref_rows= (ha_rows) table->key_info[keynr].actual_rec_per_key(kp-1); + + if (ref_rows > 0) + { + double tmp= cost_for_index_read(tab->join->thd, table, keynr, + ref_rows, + (ha_rows) tab->worst_seeks); + if (tmp < best_cost) + { + best_cost= tmp; + best_rows= (double)ref_rows; + } + } + } + } + + /* + Consider an example: + + SELECT * + FROM t1 + WHERE key1 BETWEEN 10 AND 20 AND col2='foo' + ORDER BY key1 LIMIT 10 + + If we were using a full index scan on key1, we would need to read this + many rows to get 10 matches: + + 10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo') + + This is the number we get in rows_limit. + But we intend to use range access on key1. The rows returned by quick + select will satisfy the range part of the condition, + "key1 BETWEEN 10 and 20". We will still need to filter them with + the remainder condition, (col2='foo'). + + The selectivity of the range access is (best_rows/table_records). We need + to discount it from the rows_limit: + */ + double rows_limit_for_quick= rows_limit * (best_rows / table_records); + + if (best_rows > rows_limit_for_quick) + { + /* + LIMIT clause specifies that we will need to read fewer records than + quick select will return. Assume that quick select's cost is + proportional to the number of records we need to return (e.g. if we + only need 1/3rd of records, it will cost us 1/3rd of quick select's + read time) + */ + best_cost *= rows_limit_for_quick / best_rows; + } + *read_time= best_cost; + res= true; + } + return res; +} + + +/** + Find a cheaper access key than a given @a key + + @param tab NULL or JOIN_TAB of the accessed table + @param order Linked list of ORDER BY arguments + @param table Table if tab == NULL or tab->table + @param usable_keys Key map to find a cheaper key in + @param ref_key + 0 <= key < MAX_KEY - Key that is currently used for finding + row + MAX_KEY - means index_merge is used + -1 - means we're currently not using an + index to find rows. + + @param select_limit LIMIT value + @param [out] new_key Key number if success, otherwise undefined + @param [out] new_key_direction Return -1 (reverse) or +1 if success, + otherwise undefined + @param [out] new_select_limit Return adjusted LIMIT + @param [out] new_used_key_parts NULL by default, otherwise return number + of new_key prefix columns if success + or undefined if the function fails + @param [out] saved_best_key_parts NULL by default, otherwise preserve the + value for further use in QUICK_SELECT_DESC + + @note + This function takes into account table->opt_range_condition_rows statistic + (that is calculated by the make_join_statistics function). + However, single table procedures such as mysql_update() and mysql_delete() + never call make_join_statistics, so they have to update it manually + (@see get_index_for_order()). +*/ + +static bool +test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, + key_map usable_keys, int ref_key, + ha_rows select_limit_arg, + int *new_key, int *new_key_direction, + ha_rows *new_select_limit, uint *new_used_key_parts, + uint *saved_best_key_parts) +{ + DBUG_ENTER("test_if_cheaper_ordering"); + /* + Check whether there is an index compatible with the given order + usage of which is cheaper than usage of the ref_key index (ref_key>=0) + or a table scan. + It may be the case if ORDER/GROUP BY is used with LIMIT. + */ + ha_rows best_select_limit= HA_POS_ERROR; + JOIN *join= tab ? tab->join : NULL; + uint nr; + key_map keys; + uint best_key_parts= 0; + int best_key_direction= 0; + ha_rows best_records= 0; + double read_time; + int best_key= -1; + bool is_best_covering= FALSE; + double fanout= 1; + ha_rows table_records= table->stat_records(); + bool group= join && join->group && order == join->group_list; + ha_rows refkey_rows_estimate= table->opt_range_condition_rows; + const bool has_limit= (select_limit_arg != HA_POS_ERROR); + THD* thd= join ? join->thd : table->in_use; + + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cheaper_ordering( + thd, "reconsidering_access_paths_for_index_ordering"); + trace_cheaper_ordering.add("clause", group ? "GROUP BY" : "ORDER BY"); + + /* + If not used with LIMIT, only use keys if the whole query can be + resolved with a key; This is because filesort() is usually faster than + retrieving all rows through an index. + */ + if (select_limit_arg >= table_records) + { + keys= *table->file->keys_to_use_for_scanning(); + keys.merge(table->covering_keys); + + /* + We are adding here also the index specified in FORCE INDEX clause, + if any. + This is to allow users to use index in ORDER BY. + */ + if (table->force_index) + keys.merge(group ? table->keys_in_use_for_group_by : + table->keys_in_use_for_order_by); + keys.intersect(usable_keys); + } + else + keys= usable_keys; + + if (join) + { + uint tablenr= (uint)(tab - join->join_tab); + read_time= join->best_positions[tablenr].read_time; + for (uint i= tablenr+1; i < join->table_count; i++) + { + fanout*= join->best_positions[i].records_read; // fanout is always >= 1 + // But selectivity is =< 1 : + fanout*= join->best_positions[i].cond_selectivity; + } + } + else + read_time= table->file->scan_time(); + + trace_cheaper_ordering.add("fanout", fanout); + /* + TODO: add cost of sorting here. + */ + read_time += COST_EPS; + trace_cheaper_ordering.add("read_time", read_time); + /* + Calculate the selectivity of the ref_key for REF_ACCESS. For + RANGE_ACCESS we use table->opt_range_condition_rows. + */ + if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) + { + /* + If ref access uses keypart=const for all its key parts, + and quick select uses the same # of key parts, then they are equivalent. + Reuse #rows estimate from quick select as it is more precise. + */ + if (tab->ref.const_ref_part_map == + make_prev_keypart_map(tab->ref.key_parts) && + table->opt_range_keys.is_set(ref_key) && + table->opt_range[ref_key].key_parts == tab->ref.key_parts) + refkey_rows_estimate= table->opt_range[ref_key].rows; + else + { + const KEY *ref_keyinfo= table->key_info + ref_key; + refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1]; + } + set_if_bigger(refkey_rows_estimate, 1); + } + + if (tab) + trace_cheaper_ordering.add_table_name(tab); + else + trace_cheaper_ordering.add_table_name(table); + trace_cheaper_ordering.add("rows_estimation", refkey_rows_estimate); + + Json_writer_array possible_keys(thd,"possible_keys"); + for (nr=0; nr < table->s->keys ; nr++) + { + int direction; + ha_rows select_limit= select_limit_arg; + uint used_key_parts= 0; + Json_writer_object possible_key(thd); + possible_key.add("index", table->key_info[nr].name); + + if (keys.is_set(nr) && + (direction= test_if_order_by_key(join, order, table, nr, + &used_key_parts))) + { + /* + At this point we are sure that ref_key is a non-ordering + key (where "ordering key" is a key that will return rows + in the order required by ORDER BY). + */ + DBUG_ASSERT (ref_key != (int) nr); + + possible_key.add("can_resolve_order", true); + possible_key.add("direction", direction); + bool is_covering= (table->covering_keys.is_set(nr) || + (table->file->index_flags(nr, 0, 1) & + HA_CLUSTERED_INDEX)); + /* + Don't use an index scan with ORDER BY without limit. + For GROUP BY without limit always use index scan + if there is a suitable index. + Why we hold to this asymmetry hardly can be explained + rationally. It's easy to demonstrate that using + temporary table + filesort could be cheaper for grouping + queries too. + */ + if (is_covering || + select_limit != HA_POS_ERROR || + (ref_key < 0 && (group || table->force_index))) + { + double rec_per_key; + double index_scan_time; + KEY *keyinfo= table->key_info+nr; + if (select_limit == HA_POS_ERROR) + select_limit= table_records; + if (group) + { + /* + Used_key_parts can be larger than keyinfo->user_defined_key_parts + when using a secondary index clustered with a primary + key (e.g. as in Innodb). + See Bug #28591 for details. + */ + uint used_index_parts= keyinfo->user_defined_key_parts; + uint used_pk_parts= 0; + if (used_key_parts > used_index_parts) + used_pk_parts= used_key_parts-used_index_parts; + rec_per_key= used_key_parts ? + keyinfo->actual_rec_per_key(used_key_parts-1) : 1; + /* Take into account the selectivity of the used pk prefix */ + if (used_pk_parts) + { + KEY *pkinfo=tab->table->key_info+table->s->primary_key; + /* + If the values of of records per key for the prefixes + of the primary key are considered unknown we assume + they are equal to 1. + */ + if (used_key_parts == pkinfo->user_defined_key_parts || + pkinfo->rec_per_key[0] == 0) + rec_per_key= 1; + if (rec_per_key > 1) + { + rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1); + rec_per_key/= pkinfo->actual_rec_per_key(0); + /* + The value of rec_per_key for the extended key has + to be adjusted accordingly if some components of + the secondary key are included in the primary key. + */ + for(uint i= 1; i < used_pk_parts; i++) + { + if (pkinfo->key_part[i].field->key_start.is_set(nr)) + { + /* + We presume here that for any index rec_per_key[i] != 0 + if rec_per_key[0] != 0. + */ + DBUG_ASSERT(pkinfo->actual_rec_per_key(i)); + rec_per_key*= pkinfo->actual_rec_per_key(i-1); + rec_per_key/= pkinfo->actual_rec_per_key(i); + } + } + } + } + set_if_bigger(rec_per_key, 1); + /* + With a grouping query each group containing on average + rec_per_key records produces only one row that will + be included into the result set. + */ + if (select_limit > table_records/rec_per_key) + select_limit= table_records; + else + select_limit= (ha_rows) (select_limit*rec_per_key); + } /* group */ + + /* + If tab=tk is not the last joined table tn then to get first + L records from the result set we can expect to retrieve + only L/fanout(tk,tn) where fanout(tk,tn) says how many + rows in the record set on average will match each row tk. + Usually our estimates for fanouts are too pessimistic. + So the estimate for L/fanout(tk,tn) will be too optimistic + and as result we'll choose an index scan when using ref/range + access + filesort will be cheaper. + */ + select_limit= (ha_rows) (select_limit < fanout ? + 1 : select_limit/fanout); + + /* + refkey_rows_estimate is E(#rows) produced by the table access + strategy that was picked without regard to ORDER BY ... LIMIT. + + It will be used as the source of selectivity data. + Use table->cond_selectivity as a better estimate which includes + condition selectivity too. + */ + { + // we use MIN(...), because "Using LooseScan" queries have + // cond_selectivity=1 while refkey_rows_estimate has a better + // estimate. + refkey_rows_estimate= MY_MIN(refkey_rows_estimate, + ha_rows(table_records * + table->cond_selectivity)); + } + + /* + We assume that each of the tested indexes is not correlated + with ref_key. Thus, to select first N records we have to scan + N/selectivity(ref_key) index entries. + selectivity(ref_key) = #scanned_records/#table_records = + refkey_rows_estimate/table_records. + In any case we can't select more than #table_records. + N/(refkey_rows_estimate/table_records) > table_records + <=> N > refkey_rows_estimate. + */ + + if (select_limit > refkey_rows_estimate) + select_limit= table_records; + else + select_limit= (ha_rows) (select_limit * + (double) table_records / + refkey_rows_estimate); + possible_key.add("updated_limit", select_limit); + rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1); + set_if_bigger(rec_per_key, 1); + /* + Here we take into account the fact that rows are + accessed in sequences rec_per_key records in each. + Rows in such a sequence are supposed to be ordered + by rowid/primary key. When reading the data + in a sequence we'll touch not more pages than the + table file contains. + TODO. Use the formula for a disk sweep sequential access + to calculate the cost of accessing data rows for one + index entry. + */ + index_scan_time= select_limit/rec_per_key * + MY_MIN(rec_per_key, table->file->scan_time()); + double range_scan_time; + if (get_range_limit_read_cost(tab, table, table_records, nr, + select_limit, &range_scan_time)) + { + possible_key.add("range_scan_time", range_scan_time); + if (range_scan_time < index_scan_time) + index_scan_time= range_scan_time; + } + possible_key.add("index_scan_time", index_scan_time); + + if ((ref_key < 0 && (group || table->force_index || is_covering)) || + index_scan_time < read_time) + { + ha_rows quick_records= table_records; + ha_rows refkey_select_limit= (ref_key >= 0 && + !is_hash_join_key_no(ref_key) && + table->covering_keys.is_set(ref_key)) ? + refkey_rows_estimate : + HA_POS_ERROR; + if (is_best_covering && !is_covering) + { + possible_key.add("chosen", false); + possible_key.add("cause", "covering index already found"); + continue; + } + + if (is_covering && refkey_select_limit < select_limit) + { + possible_key.add("chosen", false); + possible_key.add("cause", "ref estimates better"); + continue; + } + if (table->opt_range_keys.is_set(nr)) + quick_records= table->opt_range[nr].rows; + possible_key.add("records", quick_records); + if (best_key < 0 || + (select_limit <= MY_MIN(quick_records,best_records) ? + keyinfo->user_defined_key_parts < best_key_parts : + quick_records < best_records) || + (!is_best_covering && is_covering)) + { + possible_key.add("chosen", true); + best_key= nr; + best_key_parts= keyinfo->user_defined_key_parts; + if (saved_best_key_parts) + *saved_best_key_parts= used_key_parts; + best_records= quick_records; + is_best_covering= is_covering; + best_key_direction= direction; + best_select_limit= select_limit; + } + else + { + char const *cause; + possible_key.add("chosen", false); + if (is_covering) + cause= "covering index already found"; + else + { + if (select_limit <= MY_MIN(quick_records,best_records)) + cause= "keyparts greater than the current best keyparts"; + else + cause= "rows estimation greater"; + } + possible_key.add("cause", cause); + } + } + else + { + possible_key.add("usable", false); + possible_key.add("cause", "cost"); + } + } + else + { + possible_key.add("usable", false); + if (!group && select_limit == HA_POS_ERROR) + possible_key.add("cause", "order by without limit"); + } + } + else + { + if (keys.is_set(nr)) + { + possible_key.add("can_resolve_order", false); + possible_key.add("cause", "order can not be resolved by key"); + } + else + { + possible_key.add("can_resolve_order", false); + possible_key.add("cause", "not usable index for the query"); + } + } + } + + if (best_key < 0 || best_key == ref_key) + DBUG_RETURN(FALSE); + + *new_key= best_key; + *new_key_direction= best_key_direction; + *new_select_limit= has_limit ? best_select_limit : table_records; + if (new_used_key_parts != NULL) + *new_used_key_parts= best_key_parts; + DBUG_RETURN(TRUE); +} + + +/** + Find a key to apply single table UPDATE/DELETE by a given ORDER + + @param order Linked list of ORDER BY arguments + @param table Table to find a key + @param select Pointer to access/update select->quick (if any) + @param limit LIMIT clause parameter + @param [out] scanned_limit How many records we expect to scan + Valid if *need_sort=FALSE. + @param [out] need_sort TRUE if filesort needed + @param [out] reverse + TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY) + + @return + - MAX_KEY if no key found (need_sort == TRUE) + - MAX_KEY if quick select result order is OK (need_sort == FALSE) + - key number (either index scan or quick select) (need_sort == FALSE) + + @note + Side effects: + - may deallocate or deallocate and replace select->quick; + - may set table->opt_range_condition_rows and table->quick_rows[...] + to table->file->stats.records. +*/ + +uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, + ha_rows limit, ha_rows *scanned_limit, + bool *need_sort, bool *reverse) +{ + if (!order) + { + *need_sort= FALSE; + if (select && select->quick) + return select->quick->index; // index or MAX_KEY, use quick select as is + else + return table->file->key_used_on_scan; // MAX_KEY or index for some engines + } + + if (!is_simple_order(order)) // just to cut further expensive checks + { + *need_sort= TRUE; + return MAX_KEY; + } + + if (select && select->quick) + { + if (select->quick->index == MAX_KEY) + { + *need_sort= TRUE; + return MAX_KEY; + } + + uint used_key_parts; + switch (test_if_order_by_key(NULL, order, table, select->quick->index, + &used_key_parts)) { + case 1: // desired order + *need_sort= FALSE; + *scanned_limit= MY_MIN(limit, select->quick->records); + return select->quick->index; + case 0: // unacceptable order + *need_sort= TRUE; + return MAX_KEY; + case -1: // desired order, but opposite direction + { + QUICK_SELECT_I *reverse_quick; + if ((reverse_quick= + select->quick->make_reverse(used_key_parts))) + { + select->set_quick(reverse_quick); + *need_sort= FALSE; + *scanned_limit= MY_MIN(limit, select->quick->records); + return select->quick->index; + } + else + { + *need_sort= TRUE; + return MAX_KEY; + } + } + } + DBUG_ASSERT(0); + } + else if (limit != HA_POS_ERROR) + { // check if some index scan & LIMIT is more efficient than filesort + + /* + Update opt_range_condition_rows since single table UPDATE/DELETE + procedures don't call make_join_statistics() and leave this + variable uninitialized. + */ + table->opt_range_condition_rows= table->stat_records(); + + int key, direction; + if (test_if_cheaper_ordering(NULL, order, table, + table->keys_in_use_for_order_by, -1, + limit, + &key, &direction, &limit) && + !is_key_used(table, key, table->write_set)) + { + *need_sort= FALSE; + *scanned_limit= limit; + *reverse= (direction < 0); + return key; + } + } + *need_sort= TRUE; + return MAX_KEY; +} + + +/* + Count how many times the specified conditions are true for first rows_to_read + rows of the table. + + @param thd Thread handle + @param rows_to_read How many rows to sample + @param table Table to use + @conds conds INOUT List of conditions and counters for them + + @return Number of we've checked. It can be equal or less than rows_to_read. + 0 is returned for error or when the table had no rows. +*/ + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List<COND_STATISTIC> *conds) +{ + ulong count= 0; + COND_STATISTIC *cond; + List_iterator_fast<COND_STATISTIC> it(*conds); + handler *file= table->file; + uchar *record= table->record[0]; + int error= 0; + DBUG_ENTER("check_selectivity"); + + DBUG_ASSERT(rows_to_read > 0); + while ((cond= it++)) + { + DBUG_ASSERT(cond->cond); + DBUG_ASSERT(cond->cond->used_tables() == table->map); + cond->positive= 0; + } + it.rewind(); + + if (unlikely(file->ha_rnd_init_with_error(1))) + DBUG_RETURN(0); + do + { + error= file->ha_rnd_next(record); + + if (unlikely(thd->killed)) + { + thd->send_kill_message(); + count= 0; + goto err; + } + if (unlikely(error)) + { + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + + count++; + while ((cond= it++)) + { + if (cond->cond->val_bool()) + cond->positive++; + } + it.rewind(); + + } while (count < rows_to_read); + + file->ha_rnd_end(); + DBUG_RETURN(count); + +err: + DBUG_PRINT("error", ("error %d", error)); + file->ha_rnd_end(); + DBUG_RETURN(0); +} + +/**************************************************************************** + AGGR_OP implementation +****************************************************************************/ + +/** + @brief Instantiate tmp table for aggregation and start index scan if needed + @todo Tmp table always would be created, even for empty result. Extend + executor to avoid tmp table creation when no rows were written + into tmp table. + @return + true error + false ok +*/ + +bool +AGGR_OP::prepare_tmp_table() +{ + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + int rc= 0; + + if (!join_tab->table->is_created()) + { + if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + join->select_options)) + return true; + (void) table->file->extra(HA_EXTRA_WRITE_CACHE); + } + /* If it wasn't already, start index scan for grouping using table index. */ + if (!table->file->inited && table->group && + join_tab->tmp_table_param->sum_func_count && table->s->keys) + rc= table->file->ha_index_init(0, 0); + else + { + /* Start index scan in scanning mode */ + rc= table->file->ha_rnd_init(true); + } + if (rc) + { + table->file->print_error(rc, MYF(0)); + return true; + } + return false; +} + + +/** + @brief Prepare table if necessary and call write_func to save record + + @param end_of_records the end_of_record signal to pass to the writer + + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::put_record(bool end_of_records) +{ + // Lasy tmp table creation/initialization + if (!join_tab->table->file->inited) + if (prepare_tmp_table()) + return NESTED_LOOP_ERROR; + enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab, + end_of_records); + return rc; +} + + +/** + @brief Finish rnd/index scan after accumulating records, switch ref_array, + and send accumulated records further. + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::end_send() +{ + enum_nested_loop_state rc= NESTED_LOOP_OK; + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + + // All records were stored, send them further + int tmp, new_errno= 0; + + if ((rc= put_record(true)) < NESTED_LOOP_OK) + return rc; + + if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) + { + DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed")); + new_errno= tmp; + } + if ((tmp= table->file->ha_index_or_rnd_end())) + { + DBUG_PRINT("error",("ha_index_or_rnd_end() failed")); + new_errno= tmp; + } + if (new_errno) + { + table->file->print_error(new_errno,MYF(0)); + return NESTED_LOOP_ERROR; + } + + // Update ref array + join_tab->join->set_items_ref_array(*join_tab->ref_array); + bool keep_last_filesort_result = join_tab->filesort ? false : true; + if (join_tab->window_funcs_step) + { + if (join_tab->window_funcs_step->exec(join, keep_last_filesort_result)) + return NESTED_LOOP_ERROR; + } + + table->reginfo.lock_type= TL_UNLOCK; + + bool in_first_read= true; + + /* + Reset the counter before copying rows from internal temporary table to + INSERT table. + */ + join_tab->join->thd->get_stmt_da()->reset_current_row_for_warning(1); + while (rc == NESTED_LOOP_OK) + { + int error; + if (in_first_read) + { + in_first_read= false; + error= join_init_read_record(join_tab); + } + else + error= join_tab->read_record.read_record(); + + if (unlikely(error > 0 || (join->thd->is_error()))) // Fatal error + rc= NESTED_LOOP_ERROR; + else if (error < 0) + break; + else if (unlikely(join->thd->killed)) // Aborted by user + { + join->thd->send_kill_message(); + rc= NESTED_LOOP_KILLED; + } + else + { + rc= evaluate_join_record(join, join_tab, 0); + } + } + + if (keep_last_filesort_result) + { + delete join_tab->filesort_result; + join_tab->filesort_result= NULL; + } + + // Finish rnd scn after sending records + if (join_tab->table->file->inited) + join_tab->table->file->ha_rnd_end(); + + return rc; +} + + +/** + @brief + Remove marked top conjuncts of a condition + + @param thd The thread handle + @param cond The condition which subformulas are to be removed + + @details + The function removes all top conjuncts marked with the flag + MARKER_FULL_EXTRACTION from the condition 'cond'. The resulting + formula is returned a the result of the function + If 'cond' s marked with such flag the function returns 0. + The function clear the extraction flags for the removed + formulas + + @retval + condition without removed subformulas + 0 if the whole 'cond' is removed +*/ + +Item *remove_pushed_top_conjuncts(THD *thd, Item *cond) +{ + if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + cond->clear_extraction_flag(); + return 0; + } + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION) + { + item->clear_extraction_flag(); + li.remove(); + } + } + switch (((Item_cond*) cond)->argument_list()->elements) + { + case 0: + return 0; + case 1: + return ((Item_cond*) cond)->argument_list()->head(); + default: + return cond; + } + } + } + return cond; +} + + +/* + There are 5 cases in which we shortcut the join optimization process as we + conclude that the join would be a degenerate one + 1) IMPOSSIBLE WHERE + 2) MIN/MAX optimization (@see opt_sum_query) + 3) EMPTY CONST TABLE + If a window function is present in any of the above cases then to get the + result of the window function, we need to execute it. So we need to + create a temporary table for its execution. Here we need to take in mind + that aggregate functions and non-aggregate function need not be executed. + +*/ + +void JOIN::handle_implicit_grouping_with_window_funcs() +{ + if (select_lex->have_window_funcs() && send_row_on_empty_set()) + { + const_tables= top_join_tab_count= table_count= 0; + } +} + + + +/* + @brief + Perform a partial cleanup for the JOIN_TAB structure + + @note + this is used to cleanup resources for the re-execution of correlated + subqueries. +*/ +void JOIN_TAB::partial_cleanup() +{ + if (!table) + return; + + if (table->is_created()) + { + table->file->ha_index_or_rnd_end(); + DBUG_PRINT("info", ("close index: %s.%s alias: %s", + table->s->db.str, + table->s->table_name.str, + table->alias.c_ptr())); + if (aggr) + { + int tmp= 0; + if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) + table->file->print_error(tmp, MYF(0)); + } + } + delete filesort_result; + filesort_result= NULL; + free_cache(&read_record); +} + +/** + @brief + Construct not null conditions for provingly not nullable fields + + @details + For each non-constant joined table the function creates a conjunction + of IS NOT NULL predicates containing a predicate for each field used + in the WHERE clause or an OR expression such that + - is declared as nullable + - for which it can proved be that it is null-rejected + - is a part of some index. + This conjunction could be anded with either the WHERE condition or with + an ON expression and the modified join query would produce the same + result set as the original one. + If a conjunction of IS NOT NULL predicates is constructed for an inner + table of an outer join OJ that is not an inner table of embedded outer + joins then it is to be anded with the ON expression of OJ. + The constructed conjunctions of IS NOT NULL predicates are attached + to the corresponding tables. They used for range analysis complementary + to other sargable range conditions. + + @note + Let f be a field of the joined table t. In the context of the upper + paragraph field f is called null-rejected if any the following holds: + + - t is a table of a top inner join and a conjunctive formula that rejects + rows with null values for f can be extracted from the WHERE condition + + - t is an outer table of a top outer join operation and a conjunctive + formula over the outer tables of the outer join that rejects rows with + null values for can be extracted from the WHERE condition + + - t is an outer table of a non-top outer join operation and a conjunctive + formula over the outer tables of the outer join that rejects rows with + null values for f can be extracted from the ON expression of the + embedding outer join + + - the joined table is an inner table of a outer join operation and + a conjunctive formula over inner tables of the outer join that rejects + rows with null values for f can be extracted from the ON expression of + the outer join operation. + + It is assumed above that all inner join nests have been eliminated and + that all possible conversions of outer joins into inner joins have been + already done. +*/ + +void JOIN::make_notnull_conds_for_range_scans() +{ + DBUG_ENTER("JOIN::make_notnull_conds_for_range_scans"); + + if (impossible_where || + !optimizer_flag(thd, OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN)) + { + /* Complementary range analysis is not needed */ + DBUG_VOID_RETURN; + } + + if (conds && build_notnull_conds_for_range_scans(this, conds, + conds->used_tables())) + { + /* + Found a IS NULL conjunctive predicate for a null-rejected field + in the WHERE clause + */ + conds= (Item*) Item_false; + cond_equal= 0; + impossible_where= true; + DBUG_VOID_RETURN; + } + + List_iterator<TABLE_LIST> li(*join_list); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (tbl->on_expr) + { + if (tbl->nested_join) + { + build_notnull_conds_for_inner_nest_of_outer_join(this, tbl); + } + else if (build_notnull_conds_for_range_scans(this, tbl->on_expr, + tbl->table->map)) + { + /* + Found a IS NULL conjunctive predicate for a null-rejected field + of the inner table of an outer join with ON expression tbl->on_expr + */ + tbl->on_expr= (Item*) Item_false; + } + } + } + DBUG_VOID_RETURN; +} + + +/** + @brief + Build not null conditions for range scans of given join tables + + @param join the join for whose tables not null conditions are to be built + @param cond the condition from which not null predicates are to be inferred + @param allowed the bit map of join tables to be taken into account + + @details + For each join table t from the 'allowed' set of tables the function finds + all fields whose null-rejectedness can be inferred from null-rejectedness + of the condition cond. For each found field f from table t such that it + participates at least in one index on table t a NOT NULL predicate is + constructed and a conjunction of all such predicates is attached to t. + If when looking for null-rejecting fields of t it is discovered one of its + fields has to be null-rejected and there is IS NULL conjunctive top level + predicate for this field then the function immediately returns true. + The function uses the bitmap TABLE::tmp_set to mark found null-rejected + fields of table t. + + @note + Currently only top level conjuncts without disjunctive sub-formulas are + are taken into account when looking for null-rejected fields. + + @retval + true if a contradiction is inferred + false otherwise +*/ + +static +bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond, + table_map allowed) +{ + THD *thd= join->thd; + DBUG_ENTER("build_notnull_conds_for_range_scans"); + + for (JOIN_TAB *s= join->join_tab; + s < join->join_tab + join->table_count ; s++) + { + /* Clear all needed bitmaps to mark found fields */ + if ((allowed & s->table->map) && + !(s->table->map & join->const_table_map)) + bitmap_clear_all(&s->table->tmp_set); + } + + /* + Find all null-rejected fields assuming that cond is null-rejected and + only formulas over tables from 'allowed' are to be taken into account + */ + if (cond->find_not_null_fields(allowed)) + DBUG_RETURN(true); + + /* + For each table t from 'allowed' build a conjunction of NOT NULL predicates + constructed for all found fields if they are included in some indexes. + If the construction of the conjunction succeeds attach the formula to + t->table->notnull_cond. The condition will be used to look for + complementary range scans. + */ + for (JOIN_TAB *s= join->join_tab ; + s < join->join_tab + join->table_count ; s++) + { + TABLE *tab= s->table; + List<Item> notnull_list; + Item *notnull_cond= 0; + + if (!(allowed & tab->map) || + (s->table->map && join->const_table_map)) + continue; + + for (Field** field_ptr= tab->field; *field_ptr; field_ptr++) + { + Field *field= *field_ptr; + if (field->part_of_key.is_clear_all()) + continue; + if (!bitmap_is_set(&tab->tmp_set, field->field_index)) + continue; + Item_field *field_item= new (thd->mem_root) Item_field(thd, field); + if (!field_item) + continue; + Item *isnotnull_item= + new (thd->mem_root) Item_func_isnotnull(thd, field_item); + if (!isnotnull_item) + continue; + if (notnull_list.push_back(isnotnull_item, thd->mem_root)) + continue; + s->const_keys.merge(field->part_of_key); + } + + switch (notnull_list.elements) { + case 0: + break; + case 1: + notnull_cond= notnull_list.head(); + break; + default: + notnull_cond= + new (thd->mem_root) Item_cond_and(thd, notnull_list); + } + if (notnull_cond && !notnull_cond->fix_fields(thd, 0)) + { + tab->notnull_cond= notnull_cond; + } + } + DBUG_RETURN(false); +} + + +/** + @brief + Build not null conditions for inner nest tables of an outer join + + @param join the join for whose table nest not null conditions are to be built + @param nest_tbl the nest of the inner tables of an outer join + + @details + The function assumes that nest_tbl is the nest of the inner tables of an + outer join and so an ON expression for this outer join is attached to + nest_tbl. + The function selects the tables of the nest_tbl that are not inner tables of + embedded outer joins and then it calls build_notnull_conds_for_range_scans() + for nest_tbl->on_expr and the bitmap for the selected tables. This call + finds all fields belonging to the selected tables whose null-rejectedness + can be inferred from the null-rejectedness of nest_tbl->on_expr. After this + the function recursively finds all null_rejected fields for the remaining + tables from the nest of nest_tbl. +*/ + +static +void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, + TABLE_LIST *nest_tbl) +{ + TABLE_LIST *tbl; + table_map used_tables= 0; + List_iterator<TABLE_LIST> li(nest_tbl->nested_join->join_list); + + while ((tbl= li++)) + { + if (!tbl->on_expr) + used_tables|= tbl->table->map; + } + if (used_tables && + build_notnull_conds_for_range_scans(join, nest_tbl->on_expr, used_tables)) + { + nest_tbl->on_expr= (Item*) Item_false; + } + + li.rewind(); + while ((tbl= li++)) + { + if (tbl->on_expr) + { + if (tbl->nested_join) + { + build_notnull_conds_for_inner_nest_of_outer_join(join, tbl); + } + else if (build_notnull_conds_for_range_scans(join, tbl->on_expr, + tbl->table->map)) + tbl->on_expr= (Item*) Item_false; + } + } +} + + +/* + @brief + Initialize join cache and enable keyread +*/ +void JOIN::init_join_cache_and_keyread() +{ + JOIN_TAB *tab; + for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); + tab; + tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + { + TABLE *table= tab->table; + switch (tab->type) { + case JT_SYSTEM: + case JT_CONST: + case JT_FT: + case JT_UNKNOWN: + case JT_MAYBE_REF: + break; + case JT_EQ_REF: + case JT_REF_OR_NULL: + case JT_REF: + if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) + table->file->ha_start_keyread(tab->ref.key); + break; + case JT_HASH: + case JT_ALL: + SQL_SELECT *select; + select= tab->select ? tab->select : + (tab->filesort ? tab->filesort->select : NULL); + if (select && select->quick && select->quick->index != MAX_KEY && + table->covering_keys.is_set(select->quick->index) && + !table->no_keyread) + table->file->ha_start_keyread(select->quick->index); + break; + case JT_HASH_NEXT: + case JT_NEXT: + if ((tab->read_first_record == join_read_first || + tab->read_first_record == join_read_last) && + table->covering_keys.is_set(tab->index) && + !table->no_keyread) + { + DBUG_ASSERT(!tab->filesort); + table->file->ha_start_keyread(tab->index); + } + break; + default: + break; + /* purecov: end */ + } + + if (table->file->keyread_enabled()) + { + /* + Here we set the read_set bitmap for all covering keys + except CLUSTERED indexes, with all the key-parts inside the key. + This is needed specifically for an index that contains virtual column. + + Example: + Lets say we have this query + SELECT b FROM t1; + + and the table definition is like + CREATE TABLE t1( + a varchar(10) DEFAULT NULL, + b varchar(255) GENERATED ALWAYS AS (a) VIRTUAL, + KEY key1 (b)); + + So we a virtual column b and an index key1 defined on the virtual + column. So if a query uses a vcol, base columns that it + depends on are automatically added to the read_set - because they're + needed to calculate the vcol. + But if we're doing keyread, vcol is taken + from the index, not calculated, and base columns do not need to be + in the read set. To ensure this we try to set the read_set to only + the key-parts of the indexes. + + Another side effect of this is + Lets say you have a query + select a, b from t1 + and there is an index key1 (a,b,c) + then as key1 is covering and we would have the keyread enable for + this key, so the below call will also set the read_set for column + c, which is not a problem as we read all the columns from the index + tuple. + */ + if (!(table->file->index_flags(table->file->keyread, 0, 1) & HA_CLUSTERED_INDEX)) + table->mark_index_columns(table->file->keyread, table->read_set); + } + if (tab->cache && tab->cache->init(select_options & SELECT_DESCRIBE)) + revise_cache_usage(tab); + else + tab->remove_redundant_bnl_scan_conds(); + } +} + + +/* + @brief + Unpack temp table fields to base table fields. +*/ + +void unpack_to_base_table_fields(TABLE *table) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + for (Copy_field *cp= tab->read_record.copy_field; + cp != tab->read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + +/* + Call item->fix_after_optimize for all items registered in + lex->fix_after_optimize + + This is needed for items like ROWNUM(), which needs access to structures + created by the early optimizer pass, like JOIN +*/ + +static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex) +{ + List_iterator<Item> li(select_lex->fix_after_optimize); + + while (Item *item= li++) + item->fix_after_optimize(thd); +} + + +/* + Set a limit for the SELECT_LEX_UNIT based on ROWNUM usage. + The limit is shown in EXPLAIN +*/ + +static bool set_limit_for_unit(THD *thd, SELECT_LEX_UNIT *unit, ha_rows lim) +{ + SELECT_LEX *gpar= unit->global_parameters(); + if (gpar->limit_params.select_limit != 0 && + // limit can not be an expression but can be parameter + (!gpar->limit_params.select_limit->basic_const_item() || + ((ha_rows)gpar->limit_params.select_limit->val_int()) < lim)) + return false; + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + gpar->limit_params.select_limit= + new (thd->mem_root) Item_int(thd, lim, MAX_BIGINT_WIDTH); + if (gpar->limit_params.select_limit == 0) + return true; // EOM + + unit->set_limit(gpar); + + gpar->limit_params.explicit_limit= true; // to show in EXPLAIN + + if (arena) + thd->restore_active_arena(arena, &backup); + + return false; +} + + +/** + Check possibility of LIMIT setting by rownum() of upper SELECT and do it + + @note Ideal is to convert something like + SELECT ... + FROM (SELECT ...) table + WHERE rownum() < <CONSTANT>; + to + SELECT ... + FROM (SELECT ... LIMIT <CONSTANT>) table + WHERE rownum() < <CONSTANT>; + + @retval true EOM + @retval false no errors +*/ + +bool JOIN::optimize_upper_rownum_func() +{ + DBUG_ASSERT(select_lex->master_unit()->derived); + + if (select_lex->master_unit()->first_select() != select_lex) + return false; // first will set parameter + + if (select_lex->master_unit()->global_parameters()-> + limit_params.offset_limit != NULL) + return false; // offset is set, we cannot set limit + + SELECT_LEX *outer_select= select_lex->master_unit()->outer_select(); + /* + Check that it is safe to use rownum-limit from the outer query + (the one that has 'WHERE rownum()...') + */ + if (outer_select == NULL || + !outer_select->with_rownum || + (outer_select->options & SELECT_DISTINCT) || + outer_select->table_list.elements != 1 || + outer_select->where == NULL || + outer_select->where->type() != Item::FUNC_ITEM) + return false; + + return process_direct_rownum_comparison(thd, unit, outer_select->where); +} + + +/** + Test if the predicate compares rownum() with a constant + + @return 1 No or invalid rownum() compare + @return 0 rownum() is compared with a constant. + In this case *args contains the constant and + *inv_order constains 1 if the rownum() was the right + argument, like in 'WHERE 2 >= rownum()'. +*/ + +static bool check_rownum_usage(Item_func *func_item, longlong *limit, + bool *inv_order) +{ + Item *arg1, *arg2; + *inv_order= 0; + DBUG_ASSERT(func_item->argument_count() == 2); + + /* 'rownum op const' or 'const op field' */ + arg1= func_item->arguments()[0]->real_item(); + if (arg1->type() == Item::FUNC_ITEM && + ((Item_func*) arg1)->functype() == Item_func::ROWNUM_FUNC) + { + arg2= func_item->arguments()[1]->real_item(); + if (arg2->can_eval_in_optimize()) + { + *limit= arg2->val_int(); + return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR; + } + } + else if (arg1->can_eval_in_optimize()) + { + arg2= func_item->arguments()[1]->real_item(); + if (arg2->type() == Item::FUNC_ITEM && + ((Item_func*) arg2)->functype() == Item_func::ROWNUM_FUNC) + { + *limit= arg1->val_int(); + *inv_order= 1; + return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR; + } + } + return 1; +} + + +/* + Limit optimization for ROWNUM() + + Go through the WHERE clause and find out if there are any of the following + constructs on the top level: + rownum() <= integer_constant + rownum() < integer_constant + rownum() = 1 + + If yes, then threat the select as if 'LIMIT integer_constant' would + have been used +*/ + +static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond) +{ + DBUG_ENTER("optimize_rownum"); + + if (cond->type() == Item::COND_ITEM) + { + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + optimize_rownum(thd, unit, item); + } + DBUG_VOID_RETURN; + } + + process_direct_rownum_comparison(thd, unit, cond); + DBUG_VOID_RETURN; +} + + +static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, + Item *cond) +{ + DBUG_ENTER("process_direct_rownum_comparison"); + if (cond->real_type() == Item::FUNC_ITEM) + { + Item_func *pred= (Item_func*) cond; + longlong limit; + bool inv; + + if (pred->argument_count() != 2) + DBUG_RETURN(false); // Not a compare functions + if (check_rownum_usage(pred, &limit, &inv)) + DBUG_RETURN(false); + + Item_func::Functype pred_type= pred->functype(); + + if (inv && pred_type != Item_func::EQ_FUNC) + { + if (pred_type == Item_func::GT_FUNC) // # > rownum() + pred_type= Item_func::LT_FUNC; + else if (pred_type == Item_func::GE_FUNC) // # >= rownum() + pred_type= Item_func::LE_FUNC; + else + DBUG_RETURN(false); + } + switch (pred_type) { + case Item_func::LT_FUNC: // rownum() < # + { + if (limit <= 0) + DBUG_RETURN(false); + DBUG_RETURN(set_limit_for_unit(thd, unit, limit - 1)); + case Item_func::LE_FUNC: + DBUG_RETURN(set_limit_for_unit(thd, unit, limit)); + case Item_func::EQ_FUNC: + if (limit == 1) + DBUG_RETURN(set_limit_for_unit(thd, unit, limit)); + break; + default: + break; + } + } + } + DBUG_RETURN(false); +} + +/** + @brief + Transform IN predicates having equal constant elements to equalities + + @param thd The context of the statement + + @details + If all elements in an IN predicate are constant and equal to each other + then clause + - "a IN (e1,..,en)" can be transformed to "a = e1" + - "a NOT IN (e1,..,en)" can be transformed to "a != e1". + This means an object of Item_func_in can be replaced with an object of + Item_func_eq for IN (e1,..,en) clause or Item_func_ne for + NOT IN (e1,...,en). + Such a replacement allows the optimizer to choose a better execution plan. + + This methods applies such transformation for each IN predicate of the WHERE + condition and ON expressions of this join where possible + + @retval + false success + true failure +*/ +bool JOIN::transform_in_predicates_into_equalities(THD *thd) +{ + DBUG_ENTER("JOIN::transform_in_predicates_into_equalities"); + DBUG_RETURN(transform_all_conds_and_on_exprs( + thd, &Item::in_predicate_to_equality_transformer)); +} + + +/** + @brief + Transform all items in WHERE and ON expressions using a given transformer + + @param thd The context of the statement + transformer Pointer to the transformation function + + @details + For each item of the WHERE condition and ON expressions of the SELECT + for this join the method performs the intransformation using the given + transformation function + + @retval + false success + true failure +*/ +bool JOIN::transform_all_conds_and_on_exprs(THD *thd, + Item_transformer transformer) +{ + if (conds) + { + conds= conds->top_level_transform(thd, transformer, (uchar *) 0); + if (!conds) + return true; + } + if (join_list) + { + if (transform_all_conds_and_on_exprs_in_join_list(thd, join_list, + transformer)) + return true; + } + return false; +} + + +bool JOIN::transform_all_conds_and_on_exprs_in_join_list( + THD *thd, List<TABLE_LIST> *join_list, Item_transformer transformer) +{ + TABLE_LIST *table; + List_iterator<TABLE_LIST> li(*join_list); + + while ((table= li++)) + { + if (table->nested_join) + { + if (transform_all_conds_and_on_exprs_in_join_list( + thd, &table->nested_join->join_list, transformer)) + return true; + } + if (table->on_expr) + { + table->on_expr= table->on_expr->top_level_transform(thd, transformer, 0); + if (!table->on_expr) + return true; + } + } + return false; +} + + +/** + @} (end of group Query_Optimizer) +*/ |