summaryrefslogtreecommitdiffstats
path: root/sql/multi_range_read.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-01 18:15:00 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-01 18:15:00 +0000
commita2a2e32c02643a0cec111511220227703fda1cd5 (patch)
tree69cc2b631234c2a8e026b9cd4d72676c61c594df /sql/multi_range_read.cc
parentReleasing progress-linux version 1:10.11.8-1~progress7.99u1. (diff)
downloadmariadb-a2a2e32c02643a0cec111511220227703fda1cd5.tar.xz
mariadb-a2a2e32c02643a0cec111511220227703fda1cd5.zip
Merging upstream version 1:11.4.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r--sql/multi_range_read.cc255
1 files changed, 146 insertions, 109 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 2180cbb6..a7e14811 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -20,6 +20,74 @@
#include "key.h"
#include "sql_statistics.h"
#include "rowid_filter.h"
+#include "optimizer_defaults.h"
+
+static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+ Cost_estimate *cost);
+
+
+
+/*
+ The following calculation is the same as in multi_range_read_info()
+
+ @param cost Total cost is stored here
+ @param keyno Key number
+ @param n_ranges Number of different ranges
+ @param multi_row_ranges Number of ranges that are not EQ_REF
+ @param flags Flags. Only HA_MRR_INDEX_ONLY is used.
+ @param total_rows Number of rows expected to be read.
+ @param io_blocks Number of blocks we expect to read for
+ a not clustered index.
+ @param unassigned_single_point_ranges
+ Number of blocks we have not yet read for
+ a clustered index.
+*/
+
+void handler::calculate_costs(Cost_estimate *cost, uint keyno,
+ uint n_ranges, uint multi_row_ranges,
+ uint flags,
+ ha_rows total_rows,
+ ulonglong io_blocks,
+ ulonglong unassigned_single_point_ranges)
+{
+ cost->reset(this);
+
+ if (!is_clustering_key(keyno))
+ {
+ cost->index_cost= ha_keyread_time(keyno, n_ranges,
+ total_rows + multi_row_ranges,
+ io_blocks);
+
+ if (!(flags & HA_MRR_INDEX_ONLY))
+ {
+ /* ha_rnd_pos_time includes ROW_COPY_COST */
+ cost->row_cost= ha_rnd_pos_time(total_rows);
+ /* Adjust io cost to data size */
+ cost->row_cost.io= MY_MIN(cost->row_cost.io, row_blocks());
+ }
+ else
+ {
+ /* Index only read */
+ cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
+ }
+ }
+ else
+ {
+ /* Clustered index */
+ io_blocks= unassigned_single_point_ranges;
+ cost->index_cost= ha_keyread_time(keyno, n_ranges,
+ total_rows + multi_row_ranges,
+ io_blocks);
+ cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
+ }
+ /* Adjust io cost to data size */
+ cost->index_cost.io= MY_MIN(cost->index_cost.io, index_blocks(keyno));
+
+ cost->comp_cost= rows2double(total_rows) * WHERE_COST;
+ cost->setup_cost= MULTI_RANGE_READ_SETUP_COST;
+}
+
+
/****************************************************************************
* Default MRR implementation (MRR to non-MRR converter)
@@ -37,8 +105,8 @@
@param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
can't efficiently determine it
@param bufsz INOUT IN: Size of the buffer available for use
- OUT: Size of the buffer that is expected to be actually
- used, or 0 if buffer is not needed.
+ OUT: Size of the buffer that is expected to be
+ actually used, or 0 if buffer is not needed.
@param flags INOUT A combination of HA_MRR_* flags
@param cost OUT Estimated cost of MRR access
@@ -56,10 +124,12 @@
contain scan parameters.
*/
+
ha_rows
handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param, uint n_ranges_arg,
uint *bufsz, uint *flags,
+ ha_rows top_limit,
Cost_estimate *cost)
{
KEY_MULTI_RANGE range;
@@ -286,11 +356,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
(single_point_ranges - assigned_single_point_ranges).
We don't add these to io_blocks as we don't want to penalize equal
- readss (if we did, a range that would read 5 rows would be
+ reads (if we did, a range that would read 5 rows would be
regarded as better than one equal read).
Better to assume we have done a records_in_range() for the equal
range and it's also cached.
+
+ One effect of this is that io_blocks for simple ranges are often 0,
+ as the blocks where already read by records_in_range and we assume
+ that we don't have to read it again.
*/
io_blocks= (range_blocks_cnt - edge_blocks_cnt);
unassigned_single_point_ranges+= (single_point_ranges -
@@ -299,41 +373,33 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
if (total_rows != HA_POS_ERROR)
{
set_if_smaller(total_rows, max_rows);
-
- /* The following calculation is the same as in multi_range_read_info(): */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
- cost->reset();
- cost->avg_io_cost= cost->idx_avg_io_cost= avg_io_cost();
-
- if (!is_clustering_key(keyno))
- {
- cost->idx_io_count= (double) io_blocks;
- cost->idx_cpu_cost= (keyread_time(keyno, 0, total_rows) +
- n_ranges * IDX_LOOKUP_COST);
- if (!(*flags & HA_MRR_INDEX_ONLY))
- cost->cpu_cost= read_time(keyno, 0, total_rows);
- }
- else
+ calculate_costs(cost, keyno, n_ranges,
+ n_ranges - (uint) single_point_ranges,
+ *flags, total_rows,
+ io_blocks, unassigned_single_point_ranges);
+ if (top_limit < total_rows)
{
/*
- Clustered index
- If all index dives are to a few blocks, then limit the
- ranges used by read_time to the number of dives.
+ Calculate what the cost would be if we only have to read 'top_limit'
+ rows. This is the lowest possible cost when using the range
+ when we find the 'accepted rows' at once.
*/
- io_blocks+= unassigned_single_point_ranges;
- cost->idx_cpu_cost= n_ranges * IDX_LOOKUP_COST;
- uint limited_ranges= (uint) MY_MIN((ulonglong) n_ranges, io_blocks);
- cost->cpu_cost= read_time(keyno, limited_ranges, total_rows);
+ Cost_estimate limit_cost;
+ calculate_costs(&limit_cost, keyno, n_ranges,
+ n_ranges - (uint)single_point_ranges,
+ *flags, top_limit, io_blocks,
+ unassigned_single_point_ranges);
+ cost->limit_cost= limit_cost.total_cost();
}
- cost->cpu_cost+= (rows2double(total_rows) / TIME_FOR_COMPARE +
- MULTI_RANGE_READ_SETUP_COST);
+ DBUG_PRINT("statistics",
+ ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu "
+ "cpu_cost: %.3f",
+ table->s->keynames.type_names[keyno],
+ (ulonglong) total_rows, cost->total_cost(),
+ (ulonglong) (cost->row_cost.io + cost->index_cost.io),
+ (double) (cost->row_cost.cpu + cost->index_cost.cpu)));
}
- DBUG_PRINT("statistics",
- ("key: %s rows: %llu total_cost: %.3f io_blocks: %llu "
- "idx_io_count: %.3f cpu_cost: %.3f io_count: %.3f",
- table->s->keynames.type_names[keyno],
- (ulonglong) total_rows, cost->total_cost(), (ulonglong) io_blocks,
- cost->idx_io_count, cost->cpu_cost, cost->io_count));
DBUG_RETURN(total_rows);
}
@@ -357,7 +423,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
@param keyno Index number
@param n_ranges Estimated number of ranges (i.e. intervals) in the
range sequence.
- @param n_rows Estimated total number of records contained within all
+ @param total_rows Estimated total number of records contained within all
of the ranges
@param bufsz INOUT IN: Size of the buffer available for use
OUT: Size of the buffer that will be actually used, or
@@ -372,7 +438,8 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
other Error or can't perform the requested scan
*/
-ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges,
+ uint total_rows,
uint key_parts, uint *bufsz,
uint *flags, Cost_estimate *cost)
{
@@ -385,27 +452,32 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
*bufsz= 0; /* Default implementation doesn't need a buffer */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
- cost->reset();
+ cost->reset(this);
+
/* Produce the same cost as non-MRR code does */
if (!is_clustering_key(keyno))
{
- /*
- idx_io_count could potentially be increased with the number of
- index leaf blocks we have to read for finding n_rows.
- */
- cost->idx_io_count= n_ranges;
- cost->idx_cpu_cost= (keyread_time(keyno, 0, n_rows) +
- n_ranges * IDX_LOOKUP_COST);
+ cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
+
if (!(*flags & HA_MRR_INDEX_ONLY))
{
- cost->cpu_cost= read_time(keyno, 0, n_rows);
+ /* ha_rnd_pos_time includes ROW_COPY_COST */
+ cost->row_cost= ha_rnd_pos_time(total_rows);
+ }
+ else
+ {
+ /* Index only read */
+ cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
}
}
else
{
- cost->cpu_cost= read_time(keyno, n_ranges, (uint)n_rows);
+ /* Clustering key */
+ cost->index_cost= ha_keyread_clustered_time(keyno, n_ranges, total_rows,
+ 0);
+ cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
}
- cost->cpu_cost+= rows2double(n_rows) / TIME_FOR_COMPARE;
+ cost->comp_cost= rows2double(total_rows) * WHERE_COST;
return 0;
}
@@ -1700,8 +1772,9 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
*/
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges,
- uint *bufsz, uint *flags, Cost_estimate *cost)
+ void *seq_init_param, uint n_ranges,
+ uint *bufsz, uint *flags, ha_rows limit,
+ Cost_estimate *cost)
{
ha_rows rows;
uint def_flags= *flags;
@@ -1711,7 +1784,9 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
seq_init_param,
n_ranges,
&def_bufsz,
- &def_flags, cost);
+ &def_flags,
+ limit,
+ cost);
if (rows == HA_POS_ERROR)
{
/* Default implementation can't perform MRR scan => we can't either */
@@ -1918,7 +1993,8 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
}
-static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost);
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
+ Cost_estimate *cost);
/**
@@ -1949,7 +2025,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
ha_rows rows_in_full_step;
ha_rows rows_in_last_step;
uint n_full_steps;
- double index_read_cost;
elem_size= primary_file->ref_length +
sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
@@ -1982,6 +2057,8 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
rows_in_full_step= max_buff_entries;
rows_in_last_step= rows % max_buff_entries;
+ cost->reset(primary_file);
+
/* Adjust buffer size if we expect to use only part of the buffer */
if (n_full_steps)
{
@@ -1990,27 +2067,21 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
}
else
{
- cost->reset();
- *buffer_size= (uint)MY_MAX(*buffer_size,
- (size_t)(1.2*rows_in_last_step) * elem_size +
- primary_file->ref_length + table->key_info[keynr].key_length);
+ *buffer_size= ((uint) MY_MAX(*buffer_size,
+ (size_t)(1.2*rows_in_last_step) * elem_size +
+ primary_file->ref_length +
+ table->key_info[keynr].key_length));
}
Cost_estimate last_step_cost;
+ last_step_cost.avg_io_cost= cost->avg_io_cost;
get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
cost->add(&last_step_cost);
- if (n_full_steps != 0)
- cost->mem_cost= *buffer_size;
- else
- cost->mem_cost= (double)rows_in_last_step * elem_size;
-
/* Total cost of all index accesses */
- index_read_cost= primary_file->keyread_time(keynr, 1, rows);
- cost->add_io(index_read_cost, 1 /* Random seeks */);
-
- cost->cpu_cost+= (rows2double(rows) / TIME_FOR_COMPARE +
- MULTI_RANGE_READ_SETUP_COST);
+ cost->index_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0);
+ cost->comp_cost= rows2double(rows) * primary_file->WHERE_COST;
+ cost->setup_cost= primary_file->MULTI_RANGE_READ_SETUP_COST;
return FALSE;
}
@@ -2034,55 +2105,17 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
{
get_sweep_read_cost(table, nrows, FALSE, cost);
/* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
- double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+ double cmp_op= rows2double(nrows) * ROWID_COMPARE_COST_THD(table->in_use);
if (cmp_op < 3)
cmp_op= 3;
cost->cpu_cost += cmp_op * log2(cmp_op);
}
- else
- cost->reset();
}
/**
Get cost of reading nrows table records in a "disk sweep"
- A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
- for an ordered sequence of rowids.
-
- We assume hard disk IO. The read is performed as follows:
-
- 1. The disk head is moved to the needed cylinder
- 2. The controller waits for the plate to rotate
- 3. The data is transferred
-
- Time to do #3 is insignificant compared to #2+#1.
-
- Time to move the disk head is proportional to head travel distance.
-
- Time to wait for the plate to rotate depends on whether the disk head
- was moved or not.
-
- If disk head wasn't moved, the wait time is proportional to distance
- between the previous block and the block we're reading.
-
- If the head was moved, we don't know how much we'll need to wait for the
- plate to rotate. We assume the wait time to be a variate with a mean of
- 0.5 of full rotation time.
-
- Our cost units are "random disk seeks". The cost of random disk seek is
- actually not a constant, it depends one range of cylinders we're going
- to access. We make it constant by introducing a fuzzy concept of "typical
- datafile length" (it's fuzzy as it's hard to tell whether it should
- include index file, temp.tables etc). Then random seek cost is:
-
- 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
-
- We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
-
- If handler::avg_io_cost() < 1.0, then we will trust the handler
- when it comes to the average cost (this is for example true for HEAP).
-
@param table Table to be accessed
@param nrows Number of rows to retrieve
@param interrupted TRUE <=> Assume that the disk sweep will be
@@ -2090,16 +2123,18 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
@param cost OUT The cost.
*/
-void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
- Cost_estimate *cost)
+static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+ Cost_estimate *cost)
{
DBUG_ENTER("get_sweep_read_cost");
- cost->reset();
+#ifndef OLD_SWEEP_COST
+ cost->row_cost= table->file->ha_rnd_pos_call_time(nrows);
+#else
if (table->file->pk_is_clustering_key(table->s->primary_key))
{
- cost->cpu_cost= table->file->read_time(table->s->primary_key,
- (uint) nrows, nrows);
+ cost->cpu_cost= table->file->ha_read_and_copy_time(table->s->primary_key,
+ (uint) nrows, nrows);
}
else if ((cost->avg_io_cost= table->file->avg_io_cost()) >= 0.999)
{
@@ -2121,7 +2156,9 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
}
}
- DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+ cost->cpu_cost+= rows2double(n_rows) * ROW_COPY_COST;
+#endif
+ DBUG_PRINT("info",("returning cost: %g", cost->total_cost()));
DBUG_VOID_RETURN;
}