# # MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK # set @save_optimizer_trace = @@optimizer_trace; SET optimizer_trace='enabled=on'; CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL); INSERT INTO t1 VALUES (1,1), (2,1), (3,1); # Optimization is applied (aggregator=simple): SELECT COUNT(DISTINCT a) FROM t1; COUNT(DISTINCT a) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a)", "aggregator_type": "simple" } ] SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1; AVG(DISTINCT a) SUM(DISTINCT b) 2.0000 1 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "avg(distinct t1.a)", "aggregator_type": "simple" }, { "function": "sum(distinct t1.b)", "aggregator_type": "distinct" } ] # Only `a` is unique but it's enough to eliminate DISTINCT: SELECT COUNT(DISTINCT b, a) FROM t1; COUNT(DISTINCT b, a) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.b,t1.a)", "aggregator_type": "simple" } ] SELECT COUNT(DISTINCT a, a + b) FROM t1; COUNT(DISTINCT a, a + b) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a,t1.a + t1.b)", "aggregator_type": "simple" } ] SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1; SUM(DISTINCT a) AVG(DISTINCT a) COUNT(DISTINCT a) 5 2.5000 2 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "sum(distinct t1.a)", "aggregator_type": "simple" }, { "function": "avg(distinct t1.a)", "aggregator_type": "simple" }, { "function": "count(distinct t1.a)", "aggregator_type": "simple" } ] # Optimization is not applied 'cause function argument is not a field # (aggregator=distinct): SELECT SUM(DISTINCT a + b) FROM t1; SUM(DISTINCT a + b) 9 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "sum(distinct t1.a + t1.b)", "aggregator_type": "distinct" } ] SELECT COUNT(DISTINCT b) FROM t1; COUNT(DISTINCT b) 1 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.b)", "aggregator_type": "distinct" } ] SELECT AVG(DISTINCT b / a) FROM t1; AVG(DISTINCT b / a) 0.61110000 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "avg(distinct t1.b / t1.a)", "aggregator_type": "distinct" } ] EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 index NULL PRIMARY 4 NULL 3 Using index 2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct (/* select#2 */ select t1.a))", "aggregator_type": "distinct" } ] CREATE TABLE t2 (a INT); INSERT INTO t2 VALUES (1), (2); # Optimization is not applied 'cause there is more than one table SELECT COUNT(DISTINCT t1.a) FROM t1, t2; COUNT(DISTINCT t1.a) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a)", "aggregator_type": "distinct" } ] SELECT AVG(DISTINCT t1.a) FROM t1, t2; AVG(DISTINCT t1.a) 2.0000 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "avg(distinct t1.a)", "aggregator_type": "distinct" } ] # Const tables, optimization is applied SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2; COUNT(DISTINCT a) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a)", "aggregator_type": "simple" } ] SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3; AVG(DISTINCT t1.a) 2.0000 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "avg(distinct t1.a)", "aggregator_type": "simple" } ] SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2; COUNT(DISTINCT a) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a)", "aggregator_type": "distinct" } ] # Unique index on two columns CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL); INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2); CREATE UNIQUE INDEX t3_a_b ON t3 (a, b); # Optimization is applied: SELECT COUNT(DISTINCT a, b) FROM t3; COUNT(DISTINCT a, b) 7 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t3.a,t3.b)", "aggregator_type": "simple" } ] SELECT COUNT(DISTINCT b, a) FROM t3; COUNT(DISTINCT b, a) 7 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t3.b,t3.a)", "aggregator_type": "simple" } ] SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3; COUNT(DISTINCT b, a) 5 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t3.b,t3.a)", "aggregator_type": "simple" } ] # Optimization is applied to one of the functions: SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a; COUNT(DISTINCT b) SUM(DISTINCT a) SUM(DISTINCT a + b) 3 1 9 2 2 7 2 3 9 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t3.b)", "aggregator_type": "simple" }, { "function": "sum(distinct t3.a)", "aggregator_type": "distinct" }, { "function": "sum(distinct t3.a + t3.b)", "aggregator_type": "distinct" } ] # Can't apply optimization 'cause GROUP BY argument is not a field: SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b; COUNT(DISTINCT b) 1 2 3 1 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t3.b)", "aggregator_type": "distinct" } ] # Test merged view CREATE VIEW v1 AS SELECT * FROM t1; # Optimization is applied SELECT COUNT(DISTINCT a, b) FROM v1; COUNT(DISTINCT a, b) 3 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "count(distinct t1.a,t1.b)", "aggregator_type": "simple" } ] # GROUP_CONCAT implements non-standard distinct aggregator SELECT GROUP_CONCAT(b) FROM t1; GROUP_CONCAT(b) 1,1,1 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "group_concat(t1.b separator ',')", "aggregator_type": "simple" } ] SELECT GROUP_CONCAT(DISTINCT b) FROM t1; GROUP_CONCAT(DISTINCT b) 1 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JS [ { "function": "group_concat(distinct t1.b separator ',')", "aggregator_type": "distinct" } ] DROP TABLE t1, t2, t3; DROP VIEW v1; SET optimizer_trace = @save_optimizer_trace; # # end of 10.5 tests #