--echo #
--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
--echo #

let $histogram_type_override='JSON_HB';
--source statistics.test

--source include/have_innodb.inc
--source include/have_stat_tables.inc
--source include/have_sequence.inc
--disable_warnings
drop table if exists t1;
--enable_warnings

set @save_histogram_type=@@histogram_type;
set @save_histogram_size=@@histogram_size;

call mtr.add_suppression("Failed to parse histogram for table .*");
create table ten(a int primary key);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

set histogram_size=100;
set histogram_type='double_prec_hb';
create table t1_bin (a varchar(255));
insert into t1_bin select concat('a-', a) from ten;
analyze table t1_bin persistent for all;
select hex(histogram) from mysql.column_stats where table_name='t1_bin';
explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';

set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
--source include/json_hb_histogram.inc
select * from mysql.column_stats where table_name='t1_json';
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
explain extended select * from t1_json where a < 'b-1a';
analyze select * from t1_json where a > 'zzzzzzzzz';

drop table ten;

#
# Test different valid JSON strings that are invalid histograms.
#
UPDATE mysql.column_stats 
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":["not-a-bucket"]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[{"no-expected-members":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[{"start":{}}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

UPDATE mysql.column_stats 
SET histogram='{"histogram_hb":[]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;

--source include/have_sequence.inc
create table t2 (
  city varchar(100)
);
set histogram_size=50;
insert into t2 select 'Moscow' from seq_1_to_99;
insert into t2 select 'Helsinki' from seq_1_to_2;
set histogram_type=json_hb;
analyze table t2 persistent for all;
explain extended select * from t2 where city = 'Moscow';
analyze select * from t2 where city = 'Moscow';
explain extended select * from t2 where city = 'Helsinki';
analyze select * from t2 where city = 'helsinki';
explain extended select * from t2 where city < 'Lagos';

drop table t1_bin;
drop table t1_json;
drop table t2;

DELETE FROM mysql.column_stats;

--disable_service_connection

create schema world;
use world;
--disable_query_log
--disable_result_log
--disable_warnings
--source include/world_schema_utf8.inc
--source include/world.inc
--enable_warnings
--enable_result_log
--enable_query_log

set histogram_type='JSON_HB';
set histogram_size=50;
--disable_result_log
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
--enable_result_log

--source include/histogram_replaces.inc
SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;
analyze select * from Country use index () where Code between 'BBC' and 'GGG';
analyze select * from Country use index () where Code < 'BBC';

set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;

DROP SCHEMA world;

--enable_service_connection

use test;

create table t10 (
  a varchar(10)
);

--echo #
--echo # Histograms are not collected for empty tables:
--echo #
analyze table t10 persistent for all;
select histogram
from mysql.column_stats where table_name='t10' and db_name=database();

--echo #
--echo # Try with n_buckets > n_rows
--echo #
insert into t10 values ('Berlin'),('Paris'),('Rome');
set histogram_size=10, histogram_type='json_hb';
analyze table t10 persistent for all;
--source include/histogram_replaces.inc
select histogram
from mysql.column_stats where table_name='t10' and db_name=database();
drop table t10;

--echo #
--echo #  MDEV-26590: Stack smashing/buffer overflow in Histogram_json_hb::parse upon UPDATE on table with long VARCHAR
--echo #

CREATE TABLE t1 (b INT, a VARCHAR(3176));
INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
SET histogram_type= JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM t1;
drop table t1;

--echo #
--echo # MDEV-26589: Assertion failure upon DECODE_HISTOGRAM with NULLs in first column
--echo #

CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (NULL,1), (NULL,2);
SET histogram_type = JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
--source include/histogram_replaces.inc
SELECT DECODE_HISTOGRAM(hist_type, histogram) from mysql.column_stats;
drop table t1;

--echo #
--echo # MDEV-26711: Values in JSON histograms are not properly quoted
--echo #

create table t1 (a varchar(32));
insert into t1 values ('this is "quoted" text');
set histogram_type= JSON_HB;
analyze table t1 persistent for all;
select * from t1 where a = 'foo';
drop table t1;

--echo #
--echo # MDEV-26724 Endless loop in json_escape_to_string upon ... empty string
--echo #
CREATE TABLE t1 (f VARCHAR(8));
INSERT INTO t1 VALUES ('a'),(''),('b');
SET histogram_type=JSON_HB;
ANALYZE TABLE t PERSISTENT FOR ALL;
select * from t1;
drop table t1;

create table t1 (a char(1)) character set latin1;
insert into t1 values (0xD1);
select hex(a) from t1;
set histogram_type='json_hb';
analyze table t1 persistent for all;

--source include/histogram_replaces.inc
select decode_histogram(hist_type, histogram)
from mysql.column_stats
where db_name=database() and table_name='t1';

select * from t1;
drop table t1;

--echo #
--echo # Another testcase: use a character that cannot be represented in utf8:
--echo # Also, now it's testcase for:
--echo #  MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters
--echo #
create table t1 ( a varchar(100) character set cp1251);
insert into t1 values ( _cp1251 x'88'),( _cp1251 x'88'), ( _cp1251 x'88');
insert into t1 values ( _cp1251 x'98'),( _cp1251 x'98');
analyze table t1 persistent for all;

--source include/histogram_replaces.inc
select hist_type, histogram
from mysql.column_stats
where db_name=database() and table_name='t1';

analyze select * from t1 where a=_cp1251 x'88';

drop table t1;

--echo #
--echo # ASAN use-after-poison my_strnxfrm_simple_internal / Histogram_json_hb::range_selectivity ...
--echo # (Just the testcase)
--echo #

CREATE TABLE t1 (f CHAR(8));
INSERT INTO t1 VALUES ('foo'),('bar');

SET histogram_type = JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL;

SELECT * FROM t1 WHERE f > 'qux';
DROP TABLE t1;

--echo #
--echo # MDEV-26737: Outdated VARIABLE_COMMENT for HISTOGRAM_TYPE in I_S.SYSTEM_VARIABLES
--echo #
select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE';

--echo #
--echo # MDEV-26709: JSON histogram may contain bucketS than histogram_size allows
--echo #
create table t1 (a int);
insert into t1 values (1),(3),(5),(7);
insert into t1 select 2 from seq_1_to_25;
insert into t1 select 4 from seq_1_to_25;
insert into t1 select 6 from seq_1_to_25;

set histogram_size=4, histogram_type=JSON_HB;
analyze table t1 persistent for all;

--source include/json_hb_histogram.inc
select histogram from mysql.column_stats where table_name = 't1';

drop table t1;

--echo #
--echo # MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram
--echo #
create table t1 (c char(8));

insert into t1 values ('1x');
insert into t1 values ('1x');
insert into t1 values ('1xx');
insert into t1 values ('0xx');
insert into t1 select * from t1;
insert into t1 select * from t1;

set histogram_type= JSON_HB;
analyze table t1 persistent for all;
analyze
select c from t1 where c > '1';

drop table t1;

--echo #
--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
--echo #

create table t0(a int);
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
select a, count(*) from t1 group by a order by a;
set histogram_type=json_hb, histogram_size=default;
analyze table t1 persistent for all;
--source include/json_hb_histogram.inc
select * from mysql.column_stats where table_name='t1';
analyze select * from t1 where a=0;
analyze select * from t1 where a=50;
analyze select * from t1 where a=70;
analyze select * from t1 where a=100;
analyze select * from t1 where a=150;
analyze select * from t1 where a=200;

drop table t0,t1;

--echo #
--echo # MDEV-26892: JSON histograms become invalid with a specific (corrupt) value in t
--echo #
create table t1 (a varchar(32)) DEFAULT CHARSET=cp1257;
set histogram_type= JSON_HB, histogram_size= 1;
insert into t1 values ('foo'),(unhex('9C'));
analyze table t1 persistent for all;

select * from t1;
drop table t1;

--echo #
--echo # MDEV-26911: Unexpected ER_DUP_KEY, ASAN errors, double free detected in tcache with JSON_HB histogram
--echo #

SET histogram_type= JSON_HB;
 
CREATE TABLE t1 (pk INT AUTO_INCREMENT, f VARCHAR(8), PRIMARY KEY (pk));
INSERT INTO t1 (f) VALUES ('foo');
ANALYZE TABLE t1 PERSISTENT FOR ALL;
ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk;
INSERT INTO t1 (f) VALUES ('bar');
DROP TABLE t1;

--echo #
--echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram
--echo #
create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100;
select count(*) from t1 where a <= 0;

set histogram_type = JSON_HB, histogram_size=default;
analyze table t1 persistent for all;
analyze select * from t1 where a <= 0;
analyze select * from t1 where a < 0;
analyze select * from t1 where a > 0;
analyze select * from t1 where a >= 0;
drop table t1;


--echo #
--echo # More test coverage
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

create table t1(a int);
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;

create table t2 (a int);
insert into t2 select 1 from t1;
insert into t2 select (a+1)*10 from t0;
insert into t2 values (0);

analyze table t2 persistent for all;
analyze select * from t2 where a < 1;
analyze select * from t2 where a =100;

drop table t0,t1,t2;

--echo #
--echo # MDEV-27230: Estimation for filtered rows less precise ...
--echo #
create table t1 (a char(1));
insert into t1 select chr(seq%26+97) from seq_1_to_50;
insert into t1 select ':' from t1;
analyze table t1 persistent for all;
analyze select COUNT(*) FROM t1 WHERE a <> 'a';
analyze select COUNT(*) FROM t1 WHERE a < 'a';
drop table t1;

--echo #
--echo # MDEV-27229: Estimation for filtered rows less precise ... #5
--echo #
create table t1 (id int, a varchar(8));
insert into t1 select seq, 'bar' from seq_1_to_100;
insert into t1 select id, 'qux' from t1;

set histogram_type=JSON_HB;
analyze table t1 persistent for all;
analyze select COUNT(*) FROM t1 WHERE a > 'foo';

analyze select COUNT(*) FROM t1 WHERE a > 'aaa';
analyze select COUNT(*) FROM t1 WHERE a >='aaa';

analyze select COUNT(*) FROM t1 WHERE a > 'bar';
analyze select COUNT(*) FROM t1 WHERE a >='bar';

# Can enable these after get_avg_frequency issue is resolved:
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
analyze select COUNT(*) FROM t1 WHERE a < 'bar';

analyze select COUNT(*) FROM t1 WHERE a <='bar';

drop table t1;

--echo #
--echo # MDEV-27243: Estimation for filtered rows less precise ... #7
--echo # (Testcase only)

CREATE TABLE t1 (f TIME);
INSERT INTO t1 SELECT IF(seq%2,'00:00:00',SEC_TO_TIME(seq+7200)) FROM seq_1_to_1000;

SET histogram_type= JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
ANALYZE SELECT * FROM t1 WHERE f > '00:01:00';
drop table t1;

--echo #
--echo # MDEV-26901: Estimation for filtered rows less precise ... #4
--echo #
create table t1 (f int);
insert into t1 values
  (7),(5),(0),(5),(112),(9),(9),(7),(5),(9),
  (1),(7),(0),(6),(6),(2),(1),(6),(169),(7);

select f from t1 where f in (77, 1, 144, 73, 14, 12);

set histogram_type= JSON_HB;
analyze table t1 persistent for all;

analyze select f from t1 where f in (77, 1, 144, 73, 14, 12);
drop table t1;


--echo #
--echo # Test that histograms over BIT fields use hex
--echo #
create table t1 (a BIT(64));
insert into t1 values 
  (x'01'),(x'10'),(x'BE562B1A99001918');
set histogram_type= JSON_HB;
analyze table t1 persistent for all;
--source include/json_hb_histogram.inc
select histogram
from mysql.column_stats where table_name='t1' and db_name=database();

drop table t1;

--echo #
--echo # MDEV-28882: Assertion `tmp >= 0' failed in best_access_path
--echo #

CREATE TABLE t1 (a varchar(1));

INSERT INTO t1 VALUES ('o'),('s'),('j'),('s'),('y'),('s'),('l'),
  ('q'),('x'),('m'),('t'),('d'),('v'),('j'),('p'),('t'),('b'),('q');

set histogram_type=json_hb;
analyze table t1 persistent for all;
--echo # filtered must not be negative:
explain format=json select * from t1 where a > 'y';

drop table t1;