diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-01 18:15:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-01 18:15:00 +0000 |
commit | a2a2e32c02643a0cec111511220227703fda1cd5 (patch) | |
tree | 69cc2b631234c2a8e026b9cd4d72676c61c594df /sql/ha_partition.cc | |
parent | Releasing progress-linux version 1:10.11.8-1~progress7.99u1. (diff) | |
download | mariadb-a2a2e32c02643a0cec111511220227703fda1cd5.tar.xz mariadb-a2a2e32c02643a0cec111511220227703fda1cd5.zip |
Merging upstream version 1:11.4.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sql/ha_partition.cc')
-rw-r--r-- | sql/ha_partition.cc | 223 |
1 files changed, 152 insertions, 71 deletions
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 8cd1cb47..177f49fb 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -75,8 +75,7 @@ HA_REC_NOT_IN_SEQ | \ HA_CAN_REPAIR | \ HA_REUSES_FILE_NAMES) -#define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \ - HA_DUPLICATE_POS | \ +#define PARTITION_DISABLED_TABLE_FLAGS (HA_DUPLICATE_POS | \ HA_CAN_INSERT_DELAYED | \ HA_READ_BEFORE_WRITE_REMOVAL |\ HA_CAN_TABLES_WITHOUT_ROLLBACK) @@ -99,19 +98,19 @@ int ha_partition::notify_tabledef_changed(LEX_CSTRING *db, LEX_CUSTRING *frm, LEX_CUSTRING *version) { - char from_buff[FN_REFLEN + 1], from_lc_buff[FN_REFLEN + 1]; - const char *from_path, *name_buffer_ptr, *from; + char from_buff[FN_REFLEN + 1]; + Table_path_buffer from_lc_buff; + const char *from_path, *name_buffer_ptr; int res= 0; handler **file= m_file; DBUG_ENTER("ha_partition::notify_tabledef_changed"); - from= table->s->normalized_path.str; - /* setup m_name_buffer_ptr */ if (read_par_file(table->s->normalized_path.str)) DBUG_RETURN(1); - from_path= get_canonical_filename(*file, from, from_lc_buff); + from_path= file[0]->get_canonical_filename(table->s->normalized_path, + &from_lc_buff).str; name_buffer_ptr= m_name_buffer_ptr; do { @@ -771,7 +770,8 @@ int ha_partition::create(const char *name, TABLE *table_arg, { int error; THD *thd= ha_thd(); - char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN]; + char name_buff[FN_REFLEN + 1]; + Table_path_buffer name_lc_buff; char *name_buffer_ptr; const char *path; uint i; @@ -821,7 +821,8 @@ int ha_partition::create(const char *name, TABLE *table_arg, The appended #P#<partname>[#SP#<subpartname>] will remain in current case. Using the first partitions handler, since mixing handlers is not allowed. */ - path= get_canonical_filename(*file, name, name_lc_buff); + path= file[0]->get_canonical_filename(Lex_cstring_strlen(name), + &name_lc_buff).str; for (i= 0; i < m_part_info->num_parts; i++) { part_elem= part_it++; @@ -913,8 +914,8 @@ int ha_partition::drop_partitions(const char *path) Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. We use m_file[0] as long as all partitions have the same storage engine. */ - DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, - part_name_buff))); + DBUG_ASSERT(m_file[0]->is_canonical_filename(Lex_cstring_strlen(path))); + do { partition_element *part_elem= part_it++; @@ -1018,8 +1019,7 @@ int ha_partition::rename_partitions(const char *path) Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. We use m_file[0] as long as all partitions have the same storage engine. */ - DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, - norm_name_buff))); + DBUG_ASSERT(m_file[0]->is_canonical_filename(Lex_cstring_strlen(path))); DEBUG_SYNC(ha_thd(), "before_rename_partitions"); if (temp_partitions) @@ -1857,8 +1857,8 @@ int ha_partition::change_partitions(HA_CREATE_INFO *create_info, Assert that it works without HA_FILE_BASED and lower_case_table_name = 2. We use m_file[0] as long as all partitions have the same storage engine. */ - DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, - part_name_buff))); + DBUG_ASSERT(m_file[0]->is_canonical_filename(Lex_cstring_strlen(path))); + m_reorged_parts= 0; if (!m_part_info->is_sub_partitioned()) num_subparts= 1; @@ -2441,8 +2441,8 @@ uint ha_partition::del_ren_table(const char *from, const char *to) { int save_error= 0; int error; - char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1], - from_lc_buff[FN_REFLEN], to_lc_buff[FN_REFLEN]; + char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1]; + Table_path_buffer from_lc_buff, to_lc_buff; char *name_buffer_ptr; const char *from_path; const char *to_path= NULL; @@ -2482,9 +2482,11 @@ uint ha_partition::del_ren_table(const char *from, const char *to) The appended #P#<partname>[#SP#<subpartname>] will remain in current case. Using the first partitions handler, since mixing handlers is not allowed. */ - from_path= get_canonical_filename(*file, from, from_lc_buff); + from_path= file[0]->get_canonical_filename(Lex_cstring_strlen(from), + &from_lc_buff).str; if (to != NULL) - to_path= get_canonical_filename(*file, to, to_lc_buff); + to_path= file[0]->get_canonical_filename(Lex_cstring_strlen(to), + &to_lc_buff).str; do { if (unlikely((error= create_partition_name(from_buff, sizeof(from_buff), @@ -4775,8 +4777,8 @@ exit: part_share->next_auto_inc_val if needed. (not to be used if auto_increment on secondary field in a multi-column index) - mysql_update does not set table->next_number_field, so we use - table->found_next_number_field instead. + Sql_cmd_update::update_single_table() does not set table->next_number_field, + so we use table->found_next_number_field instead. Also checking that the field is marked in the write set. */ if (table->found_next_number_field && @@ -4889,7 +4891,7 @@ int ha_partition::delete_row(const uchar *buf) Called from item_sum.cc by Item_func_group_concat::clear(), Item_sum_count::clear(), and Item_func_group_concat::clear(). - Called from sql_delete.cc by mysql_delete(). + Called from sql_delete.cc by Sql_cmd_delete::delete_single_table(). Called from sql_select.cc by JOIN::reset(). Called from sql_union.cc by st_select_lex_unit::exec(). */ @@ -6655,7 +6657,7 @@ ha_rows ha_partition::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, - uint *mrr_mode, + uint *mrr_mode, ha_rows limit, Cost_estimate *cost) { int error; @@ -6709,14 +6711,14 @@ ha_rows ha_partition::multi_range_read_info_const(uint keyno, ha_rows tmp_rows; uint tmp_mrr_mode; m_mrr_buffer_size[i]= 0; - part_cost.reset(); + part_cost.reset(*file); tmp_mrr_mode= *mrr_mode; tmp_rows= (*file)-> multi_range_read_info_const(keyno, &m_part_seq_if, &m_partition_part_key_multi_range_hld[i], m_part_mrr_range_length[i], &m_mrr_buffer_size[i], - &tmp_mrr_mode, &part_cost); + &tmp_mrr_mode, limit, &part_cost); if (tmp_rows == HA_POS_ERROR) { m_part_spec= save_part_spec; @@ -6760,7 +6762,7 @@ ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges, { ha_rows tmp_rows; m_mrr_buffer_size[i]= 0; - part_cost.reset(); + part_cost.reset(*file); if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys, key_parts, &m_mrr_buffer_size[i], @@ -9860,16 +9862,28 @@ uint ha_partition::get_biggest_used_partition(uint *part_index) time for scan */ -double ha_partition::scan_time() +IO_AND_CPU_COST ha_partition::scan_time() { - double scan_time= 0; + IO_AND_CPU_COST scan_time= {0,0}; uint i; DBUG_ENTER("ha_partition::scan_time"); for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - scan_time+= m_file[i]->scan_time(); + { + IO_AND_CPU_COST cost= m_file[i]->scan_time(); + scan_time.io+= cost.io; + scan_time.cpu+= cost.cpu; + } + if (m_tot_parts) + { + /* + Add TABLE_SCAN_SETUP_COST for partitions to make cost similar to + in ha_scan_time() + */ + scan_time.cpu+= TABLE_SCAN_SETUP_COST * (m_tot_parts - 1); + } DBUG_RETURN(scan_time); } @@ -9883,34 +9897,78 @@ double ha_partition::scan_time() @return time for scanning index inx */ -double ha_partition::key_scan_time(uint inx) +IO_AND_CPU_COST ha_partition::key_scan_time(uint inx, ha_rows rows) { - double scan_time= 0; + IO_AND_CPU_COST scan_time= {0,0}; uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); + ha_rows rows_per_part; DBUG_ENTER("ha_partition::key_scan_time"); + + if (partitions == 0) + DBUG_RETURN(scan_time); + set_if_bigger(rows, 1); + rows_per_part= (rows + partitions - 1)/partitions; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - scan_time+= m_file[i]->key_scan_time(inx); + { + IO_AND_CPU_COST cost= m_file[i]->key_scan_time(inx, rows_per_part); + scan_time.io+= cost.io; + scan_time.cpu+= cost.cpu; + } DBUG_RETURN(scan_time); } -double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows) +IO_AND_CPU_COST ha_partition::keyread_time(uint inx, ulong ranges, ha_rows rows, + ulonglong blocks) { - double read_time= 0; + IO_AND_CPU_COST read_time= {0,0}; uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); DBUG_ENTER("ha_partition::keyread_time"); - if (!ranges) - DBUG_RETURN(handler::keyread_time(inx, ranges, rows)); + if (partitions == 0) + DBUG_RETURN(read_time); + + ha_rows rows_per_part= (rows + partitions - 1)/partitions; for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - read_time+= m_file[i]->keyread_time(inx, ranges, rows); + { + IO_AND_CPU_COST cost= m_file[i]->keyread_time(inx, ranges, rows_per_part, + blocks); + read_time.io+= cost.io; + read_time.cpu+= cost.cpu; + } + /* Add that we have to do a key lookup for all ranges in all partitions */ + read_time.cpu= (partitions-1) * ranges * KEY_LOOKUP_COST; DBUG_RETURN(read_time); } +IO_AND_CPU_COST ha_partition::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST read_time= {0,0}; + uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); + if (partitions == 0) + return read_time; + + ha_rows rows_per_part= (rows + partitions - 1)/partitions; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + IO_AND_CPU_COST cost= m_file[i]->rnd_pos_time(rows_per_part); + read_time.io+= cost.io; + read_time.cpu+= cost.cpu; + } + return read_time; +} + + /** Find number of records in a range. @param inx Index number @@ -9967,6 +10025,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key, if (estimated_rows && checked_rows && checked_rows >= min_rows_to_check) { + /* We cannot use page ranges when there is more than one partion */ + *pages= unused_page_range; DBUG_PRINT("info", ("records_in_range(inx %u): %lu (%lu * %lu / %lu)", inx, @@ -9980,6 +10040,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key, DBUG_PRINT("info", ("records_in_range(inx %u): %lu", inx, (ulong) estimated_rows)); + /* We cannot use page ranges when there is more than one partion */ + *pages= unused_page_range; DBUG_RETURN(estimated_rows); } @@ -10010,33 +10072,6 @@ ha_rows ha_partition::estimate_rows_upper_bound() } -/* - Get time to read - - SYNOPSIS - read_time() - index Index number used - ranges Number of ranges - rows Number of rows - - RETURN VALUE - time for read - - DESCRIPTION - This will be optimised later to include whether or not the index can - be used with partitioning. To achieve we need to add another parameter - that specifies how many of the index fields that are bound in the ranges. - Possibly added as a new call to handlers. -*/ - -double ha_partition::read_time(uint index, uint ranges, ha_rows rows) -{ - DBUG_ENTER("ha_partition::read_time"); - - DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows)); -} - - /** Number of rows in table. see handler.h @@ -10874,13 +10909,6 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) DBUG_RETURN(0); } - /* - In Innodb we compare with either primary key value or global DB_ROW_ID so - it is not possible that the two references are equal and are in different - partitions, but in myisam it is possible since we are comparing offsets. - Remove this assert if DB_ROW_ID is changed to be per partition. - */ - DBUG_ASSERT(!m_innodb); DBUG_RETURN(diff2 > diff1 ? -1 : 1); } @@ -12256,6 +12284,59 @@ ha_partition::can_convert_nocopy(const Field &field, return true; } +/* + Get table costs for the current statement that should be stored in + handler->cost variables. + + When we want to support many different table handlers, we should set + m_file[i]->costs to point to an unique cost structure per open + instance and call something similar as + TABLE_SHARE::update_optimizer_costs(handlerton *hton) and + handler::update_optimizer_costs(&costs) on it. +*/ + + +void ha_partition::set_optimizer_costs(THD *thd) +{ + handler::set_optimizer_costs(thd); + for (uint i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + m_file[i]->set_optimizer_costs(thd); +} + +/* + Get unique table costs for the first instance of the handler and store + in table->share +*/ + +void ha_partition::update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + uint i= bitmap_get_first_set(&m_part_info->read_partitions); + m_file[i]->update_optimizer_costs(costs); +} + +ulonglong ha_partition::row_blocks() +{ + ulonglong active_partitions= bitmap_bits_set(&m_part_info->read_partitions); + return handler::row_blocks() + MY_MAX(active_partitions,1)-1; +} + +ulonglong ha_partition::index_blocks(uint index, uint ranges, ha_rows rows) +{ + ulonglong blocks= 0; + ulonglong active_partitions= bitmap_bits_set(&m_part_info->read_partitions); + /* Assume rows are evenly dived among partitions */ + rows= (rows+ active_partitions -1) / MY_MAX(active_partitions, 1); + + for (uint i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + blocks+= m_file[i]->index_blocks(index, ranges, rows); + return blocks; +} + + struct st_mysql_storage_engine partition_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; |