summaryrefslogtreecommitdiffstats
path: root/sql/opt_sum.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sql/opt_sum.cc1096
1 files changed, 1096 insertions, 0 deletions
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
new file mode 100644
index 00000000..794ec40f
--- /dev/null
+++ b/sql/opt_sum.cc
@@ -0,0 +1,1096 @@
+/* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+ Copyright (c) 2008, 2021, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+
+/**
+ @file
+
+ Optimising of MIN(), MAX() and COUNT(*) queries without 'group by' clause
+ by replacing the aggregate expression with a constant.
+
+ Given a table with a compound key on columns (a,b,c), the following
+ types of queries are optimised (assuming the table handler supports
+ the required methods)
+
+ @verbatim
+ SELECT COUNT(*) FROM t1[,t2,t3,...]
+ SELECT MIN(b) FROM t1 WHERE a=const
+ SELECT MAX(c) FROM t1 WHERE a=const AND b=const
+ SELECT MAX(b) FROM t1 WHERE a=const AND b<const
+ SELECT MIN(b) FROM t1 WHERE a=const AND b>const
+ SELECT MIN(b) FROM t1 WHERE a=const AND b BETWEEN const AND const
+ SELECT MAX(b) FROM t1 WHERE a=const AND b BETWEEN const AND const
+ @endverbatim
+
+ Instead of '<' one can use '<=', '>', '>=' and '=' as well.
+ Instead of 'a=const' the condition 'a IS NULL' can be used.
+
+ If all selected fields are replaced then we will also remove all
+ involved tables and return the answer without any join. Thus, the
+ following query will be replaced with a row of two constants:
+ @verbatim
+ SELECT MAX(b), MIN(d) FROM t1,t2
+ WHERE a=const AND b<const AND d>const
+ @endverbatim
+ (assuming a index for column d of table t2 is defined)
+*/
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "key.h" // key_cmp_if_same
+#include "sql_select.h"
+
+static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, Field* field,
+ COND *cond, uint *range_fl,
+ uint *key_prefix_length);
+static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field,
+ COND *cond, uint range_fl, uint prefix_len);
+static int maxmin_in_range(bool max_fl, Field* field, COND *cond);
+
+
+/*
+ Get exact count of rows in all tables
+
+ SYNOPSIS
+ get_exact_records()
+ tables List of tables
+
+ NOTES
+ When this is called, we know all table handlers supports HA_HAS_RECORDS
+ or HA_STATS_RECORDS_IS_EXACT
+
+ RETURN
+ ULONGLONG_MAX Error: Could not calculate number of rows
+ # Multiplication of number of rows in all tables
+*/
+
+static ulonglong get_exact_record_count(List<TABLE_LIST> &tables)
+{
+ ulonglong count= 1;
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(tables);
+ while ((tl= ti++))
+ {
+ ha_rows tmp= tl->table->file->records();
+ if (tmp == HA_POS_ERROR)
+ return ULONGLONG_MAX;
+ count*= tmp;
+ }
+ return count;
+}
+
+
+/**
+ Use index to read MIN(field) value.
+
+ @param table Table object
+ @param ref Reference to the structure where we store the key value
+ @item_field Field used in MIN()
+ @range_fl Whether range endpoint is strict less than
+ @prefix_len Length of common key part for the range
+
+ @retval
+ 0 No errors
+ HA_ERR_... Otherwise
+*/
+
+static int get_index_min_value(TABLE *table, TABLE_REF *ref,
+ Item_field *item_field, uint range_fl,
+ uint prefix_len)
+{
+ int error;
+
+ if (!ref->key_length)
+ error= table->file->ha_index_first(table->record[0]);
+ else
+ {
+ /*
+ Use index to replace MIN/MAX functions with their values
+ according to the following rules:
+
+ 1) Insert the minimum non-null values where the WHERE clause still
+ matches, or
+ 2) a NULL value if there are only NULL values for key_part_k.
+ 3) Fail, producing a row of nulls
+
+ Implementation: Read the smallest value using the search key. If
+ the interval is open, read the next value after the search
+ key. If read fails, and we're looking for a MIN() value for a
+ nullable column, test if there is an exact match for the key.
+ */
+ if (!(range_fl & NEAR_MIN))
+ /*
+ Closed interval: Either The MIN argument is non-nullable, or
+ we have a >= predicate for the MIN argument.
+ */
+ error= table->file->ha_index_read_map(table->record[0],
+ ref->key_buff,
+ make_prev_keypart_map(ref->key_parts),
+ HA_READ_KEY_OR_NEXT);
+ else
+ {
+ /*
+ Open interval: There are two cases:
+ 1) We have only MIN() and the argument column is nullable, or
+ 2) there is a > predicate on it, nullability is irrelevant.
+ We need to scan the next bigger record first.
+ Open interval is not used if the search key involves the last keypart,
+ and it would not work.
+ */
+ DBUG_ASSERT(prefix_len < ref->key_length);
+ error= table->file->ha_index_read_map(table->record[0],
+ ref->key_buff,
+ make_prev_keypart_map(ref->key_parts),
+ HA_READ_AFTER_KEY);
+ /*
+ If the found record is outside the group formed by the search
+ prefix, or there is no such record at all, check if all
+ records in that group have NULL in the MIN argument
+ column. If that is the case return that NULL.
+
+ Check if case 1 from above holds. If it does, we should read
+ the skipped tuple.
+ */
+ if (item_field->field->real_maybe_null() &&
+ ref->key_buff[prefix_len] == 1 &&
+ /*
+ Last keypart (i.e. the argument to MIN) is set to NULL by
+ find_key_for_maxmin only if all other keyparts are bound
+ to constants in a conjunction of equalities. Hence, we
+ can detect this by checking only if the last keypart is
+ NULL.
+ */
+ (error == HA_ERR_KEY_NOT_FOUND ||
+ key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len)))
+ {
+ DBUG_ASSERT(item_field->field->real_maybe_null());
+ error= table->file->ha_index_read_map(table->record[0],
+ ref->key_buff,
+ make_prev_keypart_map(ref->key_parts),
+ HA_READ_KEY_EXACT);
+ }
+ }
+ }
+ return error;
+}
+
+
+/**
+ Use index to read MAX(field) value.
+
+ @param table Table object
+ @param ref Reference to the structure where we store the key value
+ @range_fl Whether range endpoint is strict greater than
+
+ @retval
+ 0 No errors
+ HA_ERR_... Otherwise
+*/
+
+static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
+{
+ return (ref->key_length ?
+ table->file->ha_index_read_map(table->record[0], ref->key_buff,
+ make_prev_keypart_map(ref->key_parts),
+ range_fl & NEAR_MAX ?
+ HA_READ_BEFORE_KEY :
+ HA_READ_PREFIX_LAST_OR_PREV) :
+ table->file->ha_index_last(table->record[0]));
+}
+
+
+
+/**
+ Substitutes constants for some COUNT(), MIN() and MAX() functions.
+
+ @param thd thread handler
+ @param tables list of leaves of join table tree
+ @param all_fields All fields to be returned
+ @param conds WHERE clause
+
+ @note
+ This function is only called for queries with aggregate functions and no
+ GROUP BY part. This means that the result set shall contain a single
+ row only
+
+ @retval
+ 0 no errors
+ @retval
+ 1 if all items were resolved
+ @retval
+ HA_ERR_KEY_NOT_FOUND on impossible conditions
+ @retval
+ HA_ERR_... if a deadlock or a lock wait timeout happens, for example
+ @retval
+ ER_... e.g. ER_SUBQUERY_NO_1_ROW
+*/
+
+int opt_sum_query(THD *thd,
+ List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds)
+{
+ List_iterator_fast<Item> it(all_fields);
+ List_iterator<TABLE_LIST> ti(tables);
+ TABLE_LIST *tl;
+ int const_result= 1;
+ bool recalc_const_item= 0;
+ ulonglong count= 1;
+ bool is_exact_count= TRUE, maybe_exact_count= TRUE;
+ table_map removed_tables= 0, outer_tables= 0, used_tables= 0;
+ table_map where_tables= 0;
+ Item *item;
+ int error= 0;
+ DBUG_ENTER("opt_sum_query");
+
+ thd->lex->current_select->min_max_opt_list.empty();
+
+ if (conds)
+ where_tables= conds->used_tables();
+
+ /*
+ Analyze outer join dependencies, and, if possible, compute the number
+ of returned rows.
+ */
+ while ((tl= ti++))
+ {
+ TABLE_LIST *embedded;
+ for (embedded= tl ; embedded; embedded= embedded->embedding)
+ {
+ if (embedded->on_expr)
+ break;
+ }
+ if (embedded)
+ /* Don't replace expression on a table that is part of an outer join */
+ {
+ outer_tables|= tl->table->map;
+
+ /*
+ We can't optimise LEFT JOIN in cases where the WHERE condition
+ restricts the table that is used, like in:
+ SELECT MAX(t1.a) FROM t1 LEFT JOIN t2 join-condition
+ WHERE t2.field IS NULL;
+ */
+ if (tl->table->map & where_tables)
+ DBUG_RETURN(0);
+ }
+ else
+ used_tables|= tl->table->map;
+
+ /*
+ If the storage manager of 'tl' gives exact row count as part of
+ statistics (cheap), compute the total number of rows. If there are
+ no outer table dependencies, this count may be used as the real count.
+ Schema tables are filled after this function is invoked, so we can't
+ get row count
+ */
+ if (!(tl->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) ||
+ tl->schema_table)
+ {
+ maybe_exact_count&= MY_TEST(!tl->schema_table &&
+ (tl->table->file->ha_table_flags() &
+ HA_HAS_RECORDS));
+ is_exact_count= FALSE;
+ count= 1; // ensure count != 0
+ }
+ else if (tl->is_materialized_derived() ||
+ tl->jtbm_subselect)
+ {
+ /*
+ Can't remove a derived table as it's number of rows is just an
+ estimate.
+ */
+ DBUG_RETURN(0);
+ }
+ else
+ {
+ error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ if (unlikely(error))
+ {
+ tl->table->file->print_error(error, MYF(ME_FATAL));
+ DBUG_RETURN(error);
+ }
+ count*= tl->table->file->stats.records;
+ }
+ }
+
+ /*
+ Iterate through all items in the SELECT clause and replace
+ COUNT(), MIN() and MAX() with constants (if possible).
+ */
+
+ while ((item= it++))
+ {
+ if (item->type() == Item::SUM_FUNC_ITEM)
+ {
+ Item_sum *item_sum= (((Item_sum*) item));
+ switch (item_sum->sum_func()) {
+ case Item_sum::COUNT_FUNC:
+ /*
+ If the expr in COUNT(expr) can never be null we can change this
+ to the number of rows in the tables if this number is exact and
+ there are no outer joins.
+ */
+ if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null() &&
+ !outer_tables && maybe_exact_count &&
+ ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0))
+ {
+ if (!is_exact_count)
+ {
+ if ((count= get_exact_record_count(tables)) == ULONGLONG_MAX)
+ {
+ /* Error from handler in counting rows. Don't optimize count() */
+ const_result= 0;
+ continue;
+ }
+ is_exact_count= 1; // count is now exact
+ }
+ ((Item_sum_count*) item)->make_const((longlong) count);
+ recalc_const_item= 1;
+ }
+ else
+ const_result= 0;
+ break;
+ case Item_sum::MIN_FUNC:
+ case Item_sum::MAX_FUNC:
+ {
+ int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC);
+ /*
+ If MIN/MAX(expr) is the first part of a key or if all previous
+ parts of the key is found in the COND, then we can use
+ indexes to find the key.
+ */
+ Item *expr=item_sum->get_arg(0);
+ if (((expr->used_tables() & OUTER_REF_TABLE_BIT) == 0) &&
+ expr->real_item()->type() == Item::FIELD_ITEM)
+ {
+ uchar key_buff[MAX_KEY_LENGTH];
+ TABLE_REF ref;
+ uint range_fl, prefix_len;
+
+ ref.key_buff= key_buff;
+ Item_field *item_field= (Item_field*) (expr->real_item());
+ TABLE *table= item_field->field->table;
+
+ /*
+ Look for a partial key that can be used for optimization.
+ If we succeed, ref.key_length will contain the length of
+ this key, while prefix_len will contain the length of
+ the beginning of this key without field used in MIN/MAX().
+ Type of range for the key part for this field will be
+ returned in range_fl.
+ */
+ if (table->file->inited || (outer_tables & table->map) ||
+ !find_key_for_maxmin(is_max, &ref, item_field->field, conds,
+ &range_fl, &prefix_len))
+ {
+ const_result= 0;
+ break;
+ }
+ longlong info_limit= 1;
+ error= 0;
+
+ table->file->info_push(INFO_KIND_FORCE_LIMIT_BEGIN, &info_limit);
+ if (!table->const_table)
+ {
+ if (likely(!(error= table->file->ha_index_init((uint) ref.key,
+ 1))))
+ error= (is_max ?
+ get_index_max_value(table, &ref, range_fl) :
+ get_index_min_value(table, &ref, item_field, range_fl,
+ prefix_len));
+ }
+ /* Verify that the read tuple indeed matches the search key */
+ if (!error &&
+ reckey_in_range(is_max, &ref, item_field->field,
+ conds, range_fl, prefix_len))
+ error= HA_ERR_KEY_NOT_FOUND;
+ if (!table->const_table)
+ {
+ table->file->ha_end_keyread();
+ table->file->ha_index_end();
+ }
+ table->file->info_push(INFO_KIND_FORCE_LIMIT_END, NULL);
+ if (error)
+ {
+ if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
+ DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); // No rows matching WHERE
+ /* HA_ERR_LOCK_DEADLOCK or some other error */
+ table->file->print_error(error, MYF(0));
+ DBUG_RETURN(error);
+ }
+ removed_tables|= table->map;
+ }
+ else if (!expr->const_item() || !is_exact_count || conds)
+ {
+ /*
+ The optimization is not applicable in both cases:
+ (a) 'expr' is a non-constant expression. Then we can't
+ replace 'expr' by a constant.
+ (b) 'expr' is a constant. According to ANSI, MIN/MAX must return
+ NULL if the query does not return any rows. Thus, if we are not
+ able to determine if the query returns any rows, we can't apply
+ the optimization and replace MIN/MAX with a constant.
+ (c) there is a WHERE clause. The WHERE conditions may result in
+ an empty result, but the clause cannot be taken into account here.
+ */
+ const_result= 0;
+ break;
+ }
+ item_sum->set_aggregator(thd,
+ item_sum->has_with_distinct() ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR);
+ /*
+ If count == 0 (so is_exact_count == TRUE) and
+ there're no outer joins, set to NULL,
+ otherwise set to the constant value.
+ */
+ if (!count && !outer_tables)
+ {
+ item_sum->aggregator_clear();
+ }
+ else
+ {
+ item_sum->reset_and_add();
+ /*
+ Save a reference to the item for possible rollback
+ of the min/max optimizations for this select
+ */
+ thd->lex->current_select->min_max_opt_list.push_back(item_sum);
+ }
+ item_sum->make_const();
+ recalc_const_item= 1;
+ break;
+ }
+ default:
+ const_result= 0;
+ break;
+ }
+ }
+ else if (const_result)
+ {
+ if (recalc_const_item)
+ item->update_used_tables();
+ if (!item->const_item() && item->type() != Item::WINDOW_FUNC_ITEM)
+ const_result= 0;
+ }
+ }
+
+ if (unlikely(thd->is_error()))
+ DBUG_RETURN(thd->get_stmt_da()->sql_errno());
+
+ /*
+ If we have a where clause, we can only ignore searching in the
+ tables if MIN/MAX optimisation replaced all used tables
+ We do not use replaced values in case of:
+ SELECT MIN(key) FROM table_1, empty_table
+ removed_tables is != 0 if we have used MIN() or MAX().
+ */
+ if (removed_tables && used_tables != removed_tables)
+ const_result= 0; // We didn't remove all tables
+ DBUG_RETURN(const_result);
+}
+
+
+/*
+ Check if both item1 and item2 are strings, and item1 has fewer characters
+ than item2.
+*/
+
+static bool check_item1_shorter_item2(Item *item1, Item *item2)
+{
+ if (item1->cmp_type() == STRING_RESULT &&
+ item2->cmp_type() == STRING_RESULT)
+ {
+ int len1= item1->max_length / item1->collation.collation->mbmaxlen;
+ int len2= item2->max_length / item2->collation.collation->mbmaxlen;
+ return len1 < len2;
+ }
+ return false; /* When the check is not applicable, it means "not bigger" */
+}
+
+
+/**
+ Test if the predicate compares a field with constants.
+
+ @param func_item Predicate item
+ @param[out] args Here we store the field followed by constants
+ @param[out] inv_order Is set to 1 if the predicate is of the form
+ 'const op field'
+
+ @retval
+ 0 func_item is a simple predicate: a field is compared with a constant
+ whose length does not exceed the max length of the field values
+ @retval
+ 1 Otherwise
+*/
+
+bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
+{
+ Item *item;
+ *inv_order= 0;
+ switch (func_item->argument_count()) {
+ case 0:
+ /* MULT_EQUAL_FUNC */
+ {
+ Item_equal *item_equal= (Item_equal *) func_item;
+ if (!(args[1]= item_equal->get_const()))
+ return 0;
+ Item_equal_fields_iterator it(*item_equal);
+ if (!(item= it++))
+ return 0;
+ args[0]= item->real_item();
+ if (check_item1_shorter_item2(args[0], args[1]))
+ return 0;
+ if (it++)
+ return 0;
+ }
+ break;
+ case 1:
+ /* field IS NULL */
+ item= func_item->arguments()[0]->real_item();
+ if (item->type() != Item::FIELD_ITEM)
+ return 0;
+ args[0]= item;
+ break;
+ case 2:
+ /* 'field op const' or 'const op field' */
+ item= func_item->arguments()[0]->real_item();
+ if (item->type() == Item::FIELD_ITEM)
+ {
+ args[0]= item;
+ item= func_item->arguments()[1]->real_item();
+ if (!item->const_item())
+ return 0;
+ args[1]= item;
+ }
+ else if (item->const_item())
+ {
+ args[1]= item;
+ item= func_item->arguments()[1]->real_item();
+ if (item->type() != Item::FIELD_ITEM)
+ return 0;
+ args[0]= item;
+ *inv_order= 1;
+ }
+ else
+ return 0;
+ if (check_item1_shorter_item2(args[0], args[1]))
+ return 0;
+ break;
+ case 3:
+ /* field BETWEEN const AND const */
+ item= func_item->arguments()[0]->real_item();
+ if (item->type() == Item::FIELD_ITEM)
+ {
+ args[0]= item;
+ for (int i= 1 ; i <= 2; i++)
+ {
+ item= func_item->arguments()[i]->real_item();
+ if (!item->const_item())
+ return 0;
+ args[i]= item;
+ if (check_item1_shorter_item2(args[0], args[i]))
+ return 0;
+ }
+ }
+ else
+ return 0;
+ }
+ return 1;
+}
+
+
+/**
+ Check whether a condition matches a key to get {MAX|MIN}(field):.
+
+ For the index specified by the keyinfo parameter and an index that
+ contains the field as its component (field_part), the function
+ checks whether
+
+ - the condition cond is a conjunction,
+ - all of its conjuncts refer to columns of the same table, and
+ - each conjunct is on one of the following forms:
+ - f_i = const_i or const_i = f_i or f_i IS NULL,
+ where f_i is part of the index
+ - field {<|<=|>=|>|=} const
+ - const {<|<=|>=|>|=} field
+ - field BETWEEN const_1 AND const_2
+
+ As a side-effect, the key value to be used for looking up the MIN/MAX value
+ is actually stored inside the Field object. An interesting feature is that
+ the function will find the most restrictive endpoint by over-eager
+ evaluation of the @c WHERE condition. It continually stores the current
+ endpoint inside the Field object. For a query such as
+
+ @code
+ SELECT MIN(a) FROM t1 WHERE a > 3 AND a > 5;
+ @endcode
+
+ the algorithm will recurse over the conjuction, storing first a 3 in the
+ field. In the next recursive invocation the expression a > 5 is evaluated
+ as 3 > 5 (Due to the dual nature of Field objects as value carriers and
+ field identifiers), which will obviously fail, leading to 5 being stored in
+ the Field object.
+
+ @param[in] max_fl Set to true if we are optimizing MAX(),
+ false means we are optimizing %MIN()
+ @param[in, out] ref Reference to the structure where the function
+ stores the key value
+ @param[in] keyinfo Reference to the key info
+ @param[in] field_part Pointer to the key part for the field
+ @param[in] cond WHERE condition
+ @param[in,out] key_part_used Map of matchings parts. The function will output
+ the set of key parts actually being matched in
+ this set, yet it relies on the caller to
+ initialize the value to zero. This is due
+ to the fact that this value is passed
+ recursively.
+ @param[in,out] range_fl Says whether endpoints use strict greater/less
+ than.
+ @param[out] prefix_len Length of common key part for the range
+ where MAX/MIN is searched for
+
+ @retval
+ false Index can't be used.
+ @retval
+ true We can use the index to get MIN/MAX value
+*/
+
+static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
+ KEY_PART_INFO *field_part, COND *cond,
+ key_part_map *key_part_used, uint *range_fl,
+ uint *prefix_len)
+{
+ DBUG_ENTER("matching_cond");
+ if (!cond)
+ DBUG_RETURN(TRUE);
+ Field *field= field_part->field;
+ table_map cond_used_tables= cond->used_tables();
+ if (cond_used_tables & OUTER_REF_TABLE_BIT)
+ {
+ DBUG_RETURN(FALSE);
+ }
+ if (!(cond_used_tables & field->table->map) &&
+ MY_TEST(cond_used_tables & ~PSEUDO_TABLE_BITS))
+ {
+ /* Condition doesn't restrict the used table */
+ DBUG_RETURN(!cond->const_item());
+ }
+ else if (cond->is_expensive())
+ DBUG_RETURN(FALSE);
+ if (cond->type() == Item::COND_ITEM)
+ {
+ if (((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC)
+ DBUG_RETURN(FALSE);
+
+ /* AND */
+ List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item= li++))
+ {
+ if (!matching_cond(max_fl, ref, keyinfo, field_part, item,
+ key_part_used, range_fl, prefix_len))
+ DBUG_RETURN(FALSE);
+ }
+ DBUG_RETURN(TRUE);
+ }
+
+ if (cond->type() != Item::FUNC_ITEM)
+ DBUG_RETURN(FALSE); // Not operator, can't optimize
+
+ bool eq_type= 0; // =, <=> or IS NULL
+ bool is_null_safe_eq= FALSE; // The operator is NULL safe, e.g. <=>
+ bool noeq_type= 0; // < or >
+ bool less_fl= 0; // < or <=
+ bool is_null= 0; // IS NULL
+ bool between= 0; // BETWEEN ... AND ...
+
+ switch (((Item_func*) cond)->functype()) {
+ case Item_func::ISNULL_FUNC:
+ is_null= 1; /* fall through */
+ case Item_func::EQ_FUNC:
+ eq_type= TRUE;
+ break;
+ case Item_func::EQUAL_FUNC:
+ eq_type= is_null_safe_eq= TRUE;
+ break;
+ case Item_func::LT_FUNC:
+ noeq_type= 1; /* fall through */
+ case Item_func::LE_FUNC:
+ less_fl= 1;
+ break;
+ case Item_func::GT_FUNC:
+ noeq_type= 1; /* fall through */
+ case Item_func::GE_FUNC:
+ break;
+ case Item_func::BETWEEN:
+ if (((Item_func_between*) cond)->negated)
+ DBUG_RETURN(FALSE);
+ between= 1;
+ break;
+ case Item_func::MULT_EQUAL_FUNC:
+ eq_type= 1;
+ break;
+ default:
+ DBUG_RETURN(FALSE); // Can't optimize function
+ }
+
+ Item *args[3];
+ bool inv;
+
+ /* Test if this is a comparison of a field and constant */
+ if (!simple_pred((Item_func*) cond, args, &inv))
+ DBUG_RETURN(FALSE);
+
+ if (!is_null_safe_eq && !is_null &&
+ (args[1]->is_null() || (between && args[2]->is_null())))
+ DBUG_RETURN(FALSE);
+
+ if (inv && !eq_type)
+ less_fl= 1-less_fl; // Convert '<' -> '>' (etc)
+
+ /* Check if field is part of the tested partial key */
+ uchar *key_ptr= ref->key_buff;
+ KEY_PART_INFO *part;
+ for (part= keyinfo->key_part; ; key_ptr+= part++->store_length)
+
+ {
+ if (part > field_part)
+ DBUG_RETURN(FALSE); // Field is beyond the tested parts
+ if (part->field->eq(((Item_field*) args[0])->field))
+ break; // Found a part of the key for the field
+ }
+
+ bool is_field_part= part == field_part;
+ if (!(is_field_part || eq_type))
+ DBUG_RETURN(FALSE);
+
+ key_part_map org_key_part_used= *key_part_used;
+ if (eq_type || between || max_fl == less_fl)
+ {
+ uint length= (uint)(key_ptr-ref->key_buff)+part->store_length;
+ if (ref->key_length < length)
+ {
+ /* Ultimately ref->key_length will contain the length of the search key */
+ ref->key_length= length;
+ ref->key_parts= (uint)(part - keyinfo->key_part) + 1;
+ }
+ if (!*prefix_len && part+1 == field_part)
+ *prefix_len= length;
+ if (is_field_part && eq_type)
+ *prefix_len= ref->key_length;
+
+ *key_part_used|= (key_part_map) 1 << (part - keyinfo->key_part);
+ }
+
+ if (org_key_part_used == *key_part_used &&
+ /*
+ The current search key is not being extended with a new key part. This
+ means that the a condition is added a key part for which there was a
+ previous condition. We can only overwrite such key parts in some special
+ cases, e.g. a > 2 AND a > 1 (here range_fl must be set to something). In
+ all other cases the WHERE condition is always false anyway.
+ */
+ (eq_type || *range_fl == 0))
+ DBUG_RETURN(FALSE);
+
+ if (org_key_part_used != *key_part_used ||
+ (is_field_part &&
+ (between || eq_type || max_fl == less_fl) && !cond->val_int()))
+ {
+ /*
+ It's the first predicate for this part or a predicate of the
+ following form that moves upper/lower bounds for max/min values:
+ - field BETWEEN const AND const
+ - field = const
+ - field {<|<=} const, when searching for MAX
+ - field {>|>=} const, when searching for MIN
+ */
+
+ if (is_null || (is_null_safe_eq && args[1]->is_null()))
+ {
+ /*
+ If we have a non-nullable index, we cannot use it,
+ since set_null will be ignored, and we will compare uninitialized data.
+ */
+ if (!part->field->real_maybe_null())
+ DBUG_RETURN(FALSE);
+ part->field->set_null();
+ *key_ptr= (uchar) 1;
+ }
+ else
+ {
+ /* Update endpoints for MAX/MIN, see function comment. */
+ Item *value= args[between && max_fl ? 2 : 1];
+ value->save_in_field_no_warnings(part->field, 1);
+ if (part->null_bit)
+ *key_ptr++= (uchar) MY_TEST(part->field->is_null());
+ part->field->get_key_image(key_ptr, part->length, Field::itRAW);
+ }
+ if (is_field_part)
+ {
+ if (between || eq_type)
+ {
+ *range_fl&= ~(NO_MAX_RANGE | NO_MIN_RANGE);
+ *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN);
+ }
+ else
+ {
+ *range_fl&= ~(max_fl ? NO_MAX_RANGE : NO_MIN_RANGE);
+ if (noeq_type)
+ *range_fl|= (max_fl ? NEAR_MAX : NEAR_MIN);
+ else
+ *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN);
+ }
+ }
+ }
+ else if (eq_type)
+ {
+ if ((!is_null && !cond->val_int()) ||
+ (is_null && !MY_TEST(part->field->is_null())))
+ DBUG_RETURN(FALSE); // Impossible test
+ }
+ else if (is_field_part)
+ *range_fl&= ~(max_fl ? NO_MIN_RANGE : NO_MAX_RANGE);
+ DBUG_RETURN(TRUE);
+}
+
+
+/**
+ Check whether we can get value for {max|min}(field) by using a key.
+
+ If where-condition is not a conjunction of 0 or more conjuct the
+ function returns false, otherwise it checks whether there is an
+ index including field as its k-th component/part such that:
+
+ -# for each previous component f_i there is one and only one conjunct
+ of the form: f_i= const_i or const_i= f_i or f_i is null
+ -# references to field occur only in conjucts of the form:
+ field {<|<=|>=|>|=} const or const {<|<=|>=|>|=} field or
+ field BETWEEN const1 AND const2
+ -# all references to the columns from the same table as column field
+ occur only in conjucts mentioned above.
+ -# each of k first components the index is not partial, i.e. is not
+ defined on a fixed length proper prefix of the field.
+
+ If such an index exists the function through the ref parameter
+ returns the key value to find max/min for the field using the index,
+ the length of first (k-1) components of the key and flags saying
+ how to apply the key for the search max/min value.
+ (if we have a condition field = const, prefix_len contains the length
+ of the whole search key)
+
+ @param[in] max_fl 0 for MIN(field) / 1 for MAX(field)
+ @param[in,out] ref Reference to the structure we store the key value
+ @param[in] field Field used inside MIN() / MAX()
+ @param[in] cond WHERE condition
+ @param[out] range_fl Bit flags for how to search if key is ok
+ @param[out] prefix_len Length of prefix for the search range
+
+ @note
+ This function may set field->table->key_read to true,
+ which must be reset after index is used!
+ (This can only happen when function returns 1)
+
+ @retval
+ 0 Index can not be used to optimize MIN(field)/MAX(field)
+ @retval
+ 1 Can use key to optimize MIN()/MAX().
+ In this case ref, range_fl and prefix_len are updated
+*/
+
+static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
+ Field* field, COND *cond,
+ uint *range_fl, uint *prefix_len)
+{
+ if (!(field->flags & PART_KEY_FLAG))
+ return FALSE; // Not key field
+
+ DBUG_ENTER("find_key_for_maxmin");
+
+ TABLE *table= field->table;
+ uint idx= 0;
+
+ KEY *keyinfo,*keyinfo_end;
+ for (keyinfo= table->key_info, keyinfo_end= keyinfo+table->s->keys ;
+ keyinfo != keyinfo_end;
+ keyinfo++,idx++)
+ {
+ KEY_PART_INFO *part,*part_end;
+ key_part_map key_part_to_use= 0;
+ /*
+ Perform a check if index is not disabled by ALTER TABLE
+ or IGNORE INDEX.
+ */
+ if (!table->keys_in_use_for_query.is_set(idx))
+ continue;
+ uint jdx= 0;
+ *prefix_len= 0;
+ part_end= keyinfo->key_part+table->actual_n_key_parts(keyinfo);
+ for (part= keyinfo->key_part ;
+ part != part_end ;
+ part++, jdx++, key_part_to_use= (key_part_to_use << 1) | 1)
+ {
+ if (!(table->file->index_flags(idx, jdx, 0) & HA_READ_ORDER))
+ DBUG_RETURN(FALSE);
+
+ /* Check whether the index component is partial */
+ Field *part_field= table->field[part->fieldnr-1];
+ if ((part_field->flags & BLOB_FLAG) ||
+ part->length < part_field->key_length())
+ break;
+
+ if (part->key_part_flag & HA_REVERSE_SORT)
+ break; // TODO MDEV-27576
+
+ if (field->eq(part->field))
+ {
+ ref->key= idx;
+ ref->key_length= 0;
+ ref->key_parts= 0;
+ key_part_map key_part_used= 0;
+ *range_fl= NO_MIN_RANGE | NO_MAX_RANGE;
+ if (matching_cond(max_fl, ref, keyinfo, part, cond,
+ &key_part_used, range_fl, prefix_len) &&
+ !(key_part_to_use & ~key_part_used))
+ {
+ if (!max_fl && key_part_used == key_part_to_use && part->null_bit)
+ {
+ /*
+ The query is on this form:
+
+ SELECT MIN(key_part_k)
+ FROM t1
+ WHERE key_part_1 = const and ... and key_part_k-1 = const
+
+ If key_part_k is nullable, we want to find the first matching row
+ where key_part_k is not null. The key buffer is now {const, ...,
+ NULL}. This will be passed to the handler along with a flag
+ indicating open interval. If a tuple is read that does not match
+ these search criteria, an attempt will be made to read an exact
+ match for the key buffer.
+ */
+ /* Set the first byte of key_part_k to 1, that means NULL */
+ ref->key_buff[ref->key_length]= 1;
+ ref->key_length+= part->store_length;
+ ref->key_parts++;
+ DBUG_ASSERT(ref->key_parts == jdx+1);
+ *range_fl&= ~NO_MIN_RANGE;
+ *range_fl|= NEAR_MIN; // Open interval
+ }
+ /*
+ The following test is false when the key in the key tree is
+ converted (for example to upper case)
+ */
+ if (field->part_of_key.is_set(idx))
+ table->file->ha_start_keyread(idx);
+ DBUG_RETURN(TRUE);
+ }
+ }
+ }
+ }
+ DBUG_RETURN(FALSE);
+}
+
+
+/**
+ Check whether found key is in range specified by conditions.
+
+ @param[in] max_fl 0 for MIN(field) / 1 for MAX(field)
+ @param[in] ref Reference to the key value and info
+ @param[in] field Field used the MIN/MAX expression
+ @param[in] cond WHERE condition
+ @param[in] range_fl Says whether there is a condition to to be checked
+ @param[in] prefix_len Length of the constant part of the key
+
+ @retval
+ 0 ok
+ @retval
+ 1 WHERE was not true for the found row
+*/
+
+static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field,
+ COND *cond, uint range_fl, uint prefix_len)
+{
+ if (key_cmp_if_same(field->table, ref->key_buff, ref->key, prefix_len))
+ return 1;
+ if (!cond || (range_fl & (max_fl ? NO_MIN_RANGE : NO_MAX_RANGE)))
+ return 0;
+ return maxmin_in_range(max_fl, field, cond);
+}
+
+
+/**
+ Check whether {MAX|MIN}(field) is in range specified by conditions.
+
+ @param[in] max_fl 0 for MIN(field) / 1 for MAX(field)
+ @param[in] field Field used the MIN/MAX expression
+ @param[in] cond WHERE condition
+
+ @retval
+ 0 ok
+ @retval
+ 1 WHERE was not true for the found row
+*/
+
+static int maxmin_in_range(bool max_fl, Field* field, COND *cond)
+{
+ /* If AND/OR condition */
+ if (cond->type() == Item::COND_ITEM)
+ {
+ List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item= li++))
+ {
+ if (maxmin_in_range(max_fl, field, item))
+ return 1;
+ }
+ return 0;
+ }
+
+ if (cond->used_tables() != field->table->map)
+ return 0;
+ bool less_fl= 0;
+ switch (((Item_func*) cond)->functype()) {
+ case Item_func::BETWEEN:
+ return cond->val_int() == 0; // Return 1 if WHERE is false
+ case Item_func::LT_FUNC:
+ case Item_func::LE_FUNC:
+ less_fl= 1;
+ /* fall through */
+ case Item_func::GT_FUNC:
+ case Item_func::GE_FUNC:
+ {
+ Item *item= ((Item_func*) cond)->arguments()[1];
+ /* In case of 'const op item' we have to swap the operator */
+ if (!item->const_item())
+ less_fl= 1-less_fl;
+ /*
+ We only have to check the expression if we are using an expression like
+ SELECT MAX(b) FROM t1 WHERE a=const AND b>const
+ not for
+ SELECT MAX(b) FROM t1 WHERE a=const AND b<const
+ */
+ if (max_fl != less_fl)
+ return cond->val_int() == 0; // Return 1 if WHERE is false
+ return 0;
+ }
+ default:
+ break; // Ignore
+ }
+ return 0;
+}
+