diff options
Diffstat (limited to 'mysql-test/main/statistics_json.test')
-rw-r--r-- | mysql-test/main/statistics_json.test | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test new file mode 100644 index 00000000..5263a98f --- /dev/null +++ b/mysql-test/main/statistics_json.test @@ -0,0 +1,483 @@ +--echo # +--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON +--echo # + +let $histogram_type_override='JSON_HB'; +--source statistics.test + +--source include/have_innodb.inc +--source include/have_stat_tables.inc +--source include/have_sequence.inc +--disable_warnings +drop table if exists t1; +--enable_warnings + +set @save_histogram_type=@@histogram_type; +set @save_histogram_size=@@histogram_size; + +call mtr.add_suppression("Failed to parse histogram for table .*"); +create table ten(a int primary key); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +set histogram_size=100; +set histogram_type='double_prec_hb'; +create table t1_bin (a varchar(255)); +insert into t1_bin select concat('a-', a) from ten; +analyze table t1_bin persistent for all; +select hex(histogram) from mysql.column_stats where table_name='t1_bin'; +explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; +analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; + +set histogram_type=json_hb; +create table t1_json (a varchar(255)); +insert into t1_json select concat('a-', a) from ten; +analyze table t1_json persistent for all; +--source include/json_hb_histogram.inc +select * from mysql.column_stats where table_name='t1_json'; +explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; +analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; +explain extended select * from t1_json where a < 'b-1a'; +analyze select * from t1_json where a > 'zzzzzzzzz'; + +drop table ten; + +# +# Test different valid JSON strings that are invalid histograms. +# +UPDATE mysql.column_stats +SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":"not-histogram"}' WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":["not-a-bucket"]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[{"no-expected-members":1}]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[{"start":{}}]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[{"start":"aaa", "size":"not-an-integer"}]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25}]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25, "ndv":1}]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +UPDATE mysql.column_stats +SET histogram='{"histogram_hb":[]}' +WHERE table_name='t1_json'; +FLUSH TABLES; +explain select * from t1_json limit 1; + +--source include/have_sequence.inc +create table t2 ( + city varchar(100) +); +set histogram_size=50; +insert into t2 select 'Moscow' from seq_1_to_99; +insert into t2 select 'Helsinki' from seq_1_to_2; +set histogram_type=json_hb; +analyze table t2 persistent for all; +explain extended select * from t2 where city = 'Moscow'; +analyze select * from t2 where city = 'Moscow'; +explain extended select * from t2 where city = 'Helsinki'; +analyze select * from t2 where city = 'helsinki'; +explain extended select * from t2 where city < 'Lagos'; + +drop table t1_bin; +drop table t1_json; +drop table t2; + +DELETE FROM mysql.column_stats; + +--disable_service_connection + +create schema world; +use world; +--disable_query_log +--disable_result_log +--disable_warnings +--source include/world_schema_utf8.inc +--source include/world.inc +--enable_warnings +--enable_result_log +--enable_query_log + +set histogram_type='JSON_HB'; +set histogram_size=50; +--disable_result_log +ANALYZE TABLE Country, City, CountryLanguage persistent for all; +--enable_result_log + +--source include/histogram_replaces.inc +SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats; +analyze select * from Country use index () where Code between 'BBC' and 'GGG'; +analyze select * from Country use index () where Code < 'BBC'; + +set histogram_type=@save_histogram_type; +set histogram_size=@save_histogram_size; + +DROP SCHEMA world; + +--enable_service_connection + +use test; + +create table t10 ( + a varchar(10) +); + +--echo # +--echo # Histograms are not collected for empty tables: +--echo # +analyze table t10 persistent for all; +select histogram +from mysql.column_stats where table_name='t10' and db_name=database(); + +--echo # +--echo # Try with n_buckets > n_rows +--echo # +insert into t10 values ('Berlin'),('Paris'),('Rome'); +set histogram_size=10, histogram_type='json_hb'; +analyze table t10 persistent for all; +--source include/histogram_replaces.inc +select histogram +from mysql.column_stats where table_name='t10' and db_name=database(); +drop table t10; + +--echo # +--echo # MDEV-26590: Stack smashing/buffer overflow in Histogram_json_hb::parse upon UPDATE on table with long VARCHAR +--echo # + +CREATE TABLE t1 (b INT, a VARCHAR(3176)); +INSERT INTO t1 VALUES (1,'foo'),(2,'bar'); +SET histogram_type= JSON_HB; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +SELECT * FROM t1; +drop table t1; + +--echo # +--echo # MDEV-26589: Assertion failure upon DECODE_HISTOGRAM with NULLs in first column +--echo # + +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 VALUES (NULL,1), (NULL,2); +SET histogram_type = JSON_HB; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +--source include/histogram_replaces.inc +SELECT DECODE_HISTOGRAM(hist_type, histogram) from mysql.column_stats; +drop table t1; + +--echo # +--echo # MDEV-26711: Values in JSON histograms are not properly quoted +--echo # + +create table t1 (a varchar(32)); +insert into t1 values ('this is "quoted" text'); +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +select * from t1 where a = 'foo'; +drop table t1; + +--echo # +--echo # MDEV-26724 Endless loop in json_escape_to_string upon ... empty string +--echo # +CREATE TABLE t1 (f VARCHAR(8)); +INSERT INTO t1 VALUES ('a'),(''),('b'); +SET histogram_type=JSON_HB; +ANALYZE TABLE t PERSISTENT FOR ALL; +select * from t1; +drop table t1; + +create table t1 (a char(1)) character set latin1; +insert into t1 values (0xD1); +select hex(a) from t1; +set histogram_type='json_hb'; +analyze table t1 persistent for all; + +--source include/histogram_replaces.inc +select decode_histogram(hist_type, histogram) +from mysql.column_stats +where db_name=database() and table_name='t1'; + +select * from t1; +drop table t1; + +--echo # +--echo # Another testcase: use a character that cannot be represented in utf8: +--echo # Also, now it's testcase for: +--echo # MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters +--echo # +create table t1 ( a varchar(100) character set cp1251); +insert into t1 values ( _cp1251 x'88'),( _cp1251 x'88'), ( _cp1251 x'88'); +insert into t1 values ( _cp1251 x'98'),( _cp1251 x'98'); +analyze table t1 persistent for all; + +--source include/histogram_replaces.inc +select hist_type, histogram +from mysql.column_stats +where db_name=database() and table_name='t1'; + +analyze select * from t1 where a=_cp1251 x'88'; + +drop table t1; + +--echo # +--echo # ASAN use-after-poison my_strnxfrm_simple_internal / Histogram_json_hb::range_selectivity ... +--echo # (Just the testcase) +--echo # + +CREATE TABLE t1 (f CHAR(8)); +INSERT INTO t1 VALUES ('foo'),('bar'); + +SET histogram_type = JSON_HB; +ANALYZE TABLE t1 PERSISTENT FOR ALL; + +SELECT * FROM t1 WHERE f > 'qux'; +DROP TABLE t1; + +--echo # +--echo # MDEV-26737: Outdated VARIABLE_COMMENT for HISTOGRAM_TYPE in I_S.SYSTEM_VARIABLES +--echo # +select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE'; + +--echo # +--echo # MDEV-26709: JSON histogram may contain bucketS than histogram_size allows +--echo # +create table t1 (a int); +insert into t1 values (1),(3),(5),(7); +insert into t1 select 2 from seq_1_to_25; +insert into t1 select 4 from seq_1_to_25; +insert into t1 select 6 from seq_1_to_25; + +set histogram_size=4, histogram_type=JSON_HB; +analyze table t1 persistent for all; + +--source include/json_hb_histogram.inc +select histogram from mysql.column_stats where table_name = 't1'; + +drop table t1; + +--echo # +--echo # MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram +--echo # +create table t1 (c char(8)); + +insert into t1 values ('1x'); +insert into t1 values ('1x'); +insert into t1 values ('1xx'); +insert into t1 values ('0xx'); +insert into t1 select * from t1; +insert into t1 select * from t1; + +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +analyze +select c from t1 where c > '1'; + +drop table t1; + +--echo # +--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values +--echo # + +create table t0(a int); +insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int); +insert into t1 select 100*A.a from t0 A, t0 B, t0 C; +select a, count(*) from t1 group by a order by a; +set histogram_type=json_hb, histogram_size=default; +analyze table t1 persistent for all; +--source include/json_hb_histogram.inc +select * from mysql.column_stats where table_name='t1'; +analyze select * from t1 where a=0; +analyze select * from t1 where a=50; +analyze select * from t1 where a=70; +analyze select * from t1 where a=100; +analyze select * from t1 where a=150; +analyze select * from t1 where a=200; + +drop table t0,t1; + +--echo # +--echo # MDEV-26892: JSON histograms become invalid with a specific (corrupt) value in t +--echo # +create table t1 (a varchar(32)) DEFAULT CHARSET=cp1257; +set histogram_type= JSON_HB, histogram_size= 1; +insert into t1 values ('foo'),(unhex('9C')); +analyze table t1 persistent for all; + +select * from t1; +drop table t1; + +--echo # +--echo # MDEV-26911: Unexpected ER_DUP_KEY, ASAN errors, double free detected in tcache with JSON_HB histogram +--echo # + +SET histogram_type= JSON_HB; + +CREATE TABLE t1 (pk INT AUTO_INCREMENT, f VARCHAR(8), PRIMARY KEY (pk)); +INSERT INTO t1 (f) VALUES ('foo'); +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk; +INSERT INTO t1 (f) VALUES ('bar'); +DROP TABLE t1; + +--echo # +--echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram +--echo # +create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100; +select count(*) from t1 where a <= 0; + +set histogram_type = JSON_HB, histogram_size=default; +analyze table t1 persistent for all; +analyze select * from t1 where a <= 0; +analyze select * from t1 where a < 0; +analyze select * from t1 where a > 0; +analyze select * from t1 where a >= 0; +drop table t1; + + +--echo # +--echo # More test coverage +--echo # +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t1(a int); +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; + +create table t2 (a int); +insert into t2 select 1 from t1; +insert into t2 select (a+1)*10 from t0; +insert into t2 values (0); + +analyze table t2 persistent for all; +analyze select * from t2 where a < 1; +analyze select * from t2 where a =100; + +drop table t0,t1,t2; + +--echo # +--echo # MDEV-27230: Estimation for filtered rows less precise ... +--echo # +create table t1 (a char(1)); +insert into t1 select chr(seq%26+97) from seq_1_to_50; +insert into t1 select ':' from t1; +analyze table t1 persistent for all; +analyze select COUNT(*) FROM t1 WHERE a <> 'a'; +analyze select COUNT(*) FROM t1 WHERE a < 'a'; +drop table t1; + +--echo # +--echo # MDEV-27229: Estimation for filtered rows less precise ... #5 +--echo # +create table t1 (id int, a varchar(8)); +insert into t1 select seq, 'bar' from seq_1_to_100; +insert into t1 select id, 'qux' from t1; + +set histogram_type=JSON_HB; +analyze table t1 persistent for all; +analyze select COUNT(*) FROM t1 WHERE a > 'foo'; + +analyze select COUNT(*) FROM t1 WHERE a > 'aaa'; +analyze select COUNT(*) FROM t1 WHERE a >='aaa'; + +analyze select COUNT(*) FROM t1 WHERE a > 'bar'; +analyze select COUNT(*) FROM t1 WHERE a >='bar'; + +# Can enable these after get_avg_frequency issue is resolved: +analyze select COUNT(*) FROM t1 WHERE a < 'aaa'; +analyze select COUNT(*) FROM t1 WHERE a <='aaa'; +analyze select COUNT(*) FROM t1 WHERE a < 'bar'; + +analyze select COUNT(*) FROM t1 WHERE a <='bar'; + +drop table t1; + +--echo # +--echo # MDEV-27243: Estimation for filtered rows less precise ... #7 +--echo # (Testcase only) + +CREATE TABLE t1 (f TIME); +INSERT INTO t1 SELECT IF(seq%2,'00:00:00',SEC_TO_TIME(seq+7200)) FROM seq_1_to_1000; + +SET histogram_type= JSON_HB; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE SELECT * FROM t1 WHERE f > '00:01:00'; +drop table t1; + +--echo # +--echo # MDEV-26901: Estimation for filtered rows less precise ... #4 +--echo # +create table t1 (f int); +insert into t1 values + (7),(5),(0),(5),(112),(9),(9),(7),(5),(9), + (1),(7),(0),(6),(6),(2),(1),(6),(169),(7); + +select f from t1 where f in (77, 1, 144, 73, 14, 12); + +set histogram_type= JSON_HB; +analyze table t1 persistent for all; + +analyze select f from t1 where f in (77, 1, 144, 73, 14, 12); +drop table t1; + + +--echo # +--echo # Test that histograms over BIT fields use hex +--echo # +create table t1 (a BIT(64)); +insert into t1 values + (x'01'),(x'10'),(x'BE562B1A99001918'); +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +--source include/json_hb_histogram.inc +select histogram +from mysql.column_stats where table_name='t1' and db_name=database(); + +drop table t1; + +--echo # +--echo # MDEV-28882: Assertion `tmp >= 0' failed in best_access_path +--echo # + +CREATE TABLE t1 (a varchar(1)); + +INSERT INTO t1 VALUES ('o'),('s'),('j'),('s'),('y'),('s'),('l'), + ('q'),('x'),('m'),('t'),('d'),('v'),('j'),('p'),('t'),('b'),('q'); + +set histogram_type=json_hb; +analyze table t1 persistent for all; +--echo # filtered must not be negative: +explain format=json select * from t1 where a > 'y'; + +drop table t1; + |