--echo # --echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON --echo # let $histogram_type_override='JSON_HB'; --source statistics.test --source include/have_innodb.inc --source include/have_stat_tables.inc --source include/have_sequence.inc --disable_warnings drop table if exists t1; --enable_warnings set @save_histogram_type=@@histogram_type; set @save_histogram_size=@@histogram_size; call mtr.add_suppression("Failed to parse histogram for table .*"); create table ten(a int primary key); insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); set histogram_size=100; set histogram_type='double_prec_hb'; create table t1_bin (a varchar(255)); insert into t1_bin select concat('a-', a) from ten; analyze table t1_bin persistent for all; select hex(histogram) from mysql.column_stats where table_name='t1_bin'; explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; set histogram_type=json_hb; create table t1_json (a varchar(255)); insert into t1_json select concat('a-', a) from ten; analyze table t1_json persistent for all; --source include/json_hb_histogram.inc select * from mysql.column_stats where table_name='t1_json'; explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; explain extended select * from t1_json where a < 'b-1a'; analyze select * from t1_json where a > 'zzzzzzzzz'; drop table ten; # # Test different valid JSON strings that are invalid histograms. # UPDATE mysql.column_stats SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":"not-histogram"}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":["not-a-bucket"]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[{"no-expected-members":1}]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[{"start":{}}]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[{"start":"aaa", "size":"not-an-integer"}]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25}]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[{"start":"aaa", "size":0.25, "ndv":1}]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; UPDATE mysql.column_stats SET histogram='{"histogram_hb":[]}' WHERE table_name='t1_json'; FLUSH TABLES; explain select * from t1_json limit 1; --source include/have_sequence.inc create table t2 ( city varchar(100) ); set histogram_size=50; insert into t2 select 'Moscow' from seq_1_to_99; insert into t2 select 'Helsinki' from seq_1_to_2; set histogram_type=json_hb; analyze table t2 persistent for all; explain extended select * from t2 where city = 'Moscow'; analyze select * from t2 where city = 'Moscow'; explain extended select * from t2 where city = 'Helsinki'; analyze select * from t2 where city = 'helsinki'; explain extended select * from t2 where city < 'Lagos'; drop table t1_bin; drop table t1_json; drop table t2; DELETE FROM mysql.column_stats; --disable_service_connection create schema world; use world; --disable_query_log --disable_result_log --disable_warnings --source include/world_schema_utf8.inc --source include/world.inc --enable_warnings --enable_result_log --enable_query_log set histogram_type='JSON_HB'; set histogram_size=50; --disable_result_log ANALYZE TABLE Country, City, CountryLanguage persistent for all; --enable_result_log --source include/histogram_replaces.inc SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats; analyze select * from Country use index () where Code between 'BBC' and 'GGG'; analyze select * from Country use index () where Code < 'BBC'; set histogram_type=@save_histogram_type; set histogram_size=@save_histogram_size; DROP SCHEMA world; --enable_service_connection use test; create table t10 ( a varchar(10) ); --echo # --echo # Histograms are not collected for empty tables: --echo # analyze table t10 persistent for all; select histogram from mysql.column_stats where table_name='t10' and db_name=database(); --echo # --echo # Try with n_buckets > n_rows --echo # insert into t10 values ('Berlin'),('Paris'),('Rome'); set histogram_size=10, histogram_type='json_hb'; analyze table t10 persistent for all; --source include/histogram_replaces.inc select histogram from mysql.column_stats where table_name='t10' and db_name=database(); drop table t10; --echo # --echo # MDEV-26590: Stack smashing/buffer overflow in Histogram_json_hb::parse upon UPDATE on table with long VARCHAR --echo # CREATE TABLE t1 (b INT, a VARCHAR(3176)); INSERT INTO t1 VALUES (1,'foo'),(2,'bar'); SET histogram_type= JSON_HB; ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM t1; drop table t1; --echo # --echo # MDEV-26589: Assertion failure upon DECODE_HISTOGRAM with NULLs in first column --echo # CREATE TABLE t1 (a INT, b INT); INSERT INTO t1 VALUES (NULL,1), (NULL,2); SET histogram_type = JSON_HB; ANALYZE TABLE t1 PERSISTENT FOR ALL; --source include/histogram_replaces.inc SELECT DECODE_HISTOGRAM(hist_type, histogram) from mysql.column_stats; drop table t1; --echo # --echo # MDEV-26711: Values in JSON histograms are not properly quoted --echo # create table t1 (a varchar(32)); insert into t1 values ('this is "quoted" text'); set histogram_type= JSON_HB; analyze table t1 persistent for all; select * from t1 where a = 'foo'; drop table t1; --echo # --echo # MDEV-26724 Endless loop in json_escape_to_string upon ... empty string --echo # CREATE TABLE t1 (f VARCHAR(8)); INSERT INTO t1 VALUES ('a'),(''),('b'); SET histogram_type=JSON_HB; ANALYZE TABLE t PERSISTENT FOR ALL; select * from t1; drop table t1; create table t1 (a char(1)) character set latin1; insert into t1 values (0xD1); select hex(a) from t1; set histogram_type='json_hb'; analyze table t1 persistent for all; --source include/histogram_replaces.inc select decode_histogram(hist_type, histogram) from mysql.column_stats where db_name=database() and table_name='t1'; select * from t1; drop table t1; --echo # --echo # Another testcase: use a character that cannot be represented in utf8: --echo # Also, now it's testcase for: --echo # MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters --echo # create table t1 ( a varchar(100) character set cp1251); insert into t1 values ( _cp1251 x'88'),( _cp1251 x'88'), ( _cp1251 x'88'); insert into t1 values ( _cp1251 x'98'),( _cp1251 x'98'); analyze table t1 persistent for all; --source include/histogram_replaces.inc select hist_type, histogram from mysql.column_stats where db_name=database() and table_name='t1'; analyze select * from t1 where a=_cp1251 x'88'; drop table t1; --echo # --echo # ASAN use-after-poison my_strnxfrm_simple_internal / Histogram_json_hb::range_selectivity ... --echo # (Just the testcase) --echo # CREATE TABLE t1 (f CHAR(8)); INSERT INTO t1 VALUES ('foo'),('bar'); SET histogram_type = JSON_HB; ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM t1 WHERE f > 'qux'; DROP TABLE t1; --echo # --echo # MDEV-26737: Outdated VARIABLE_COMMENT for HISTOGRAM_TYPE in I_S.SYSTEM_VARIABLES --echo # select variable_comment from information_schema.system_variables where VARIABLE_NAME='HISTOGRAM_TYPE'; --echo # --echo # MDEV-26709: JSON histogram may contain bucketS than histogram_size allows --echo # create table t1 (a int); insert into t1 values (1),(3),(5),(7); insert into t1 select 2 from seq_1_to_25; insert into t1 select 4 from seq_1_to_25; insert into t1 select 6 from seq_1_to_25; set histogram_size=4, histogram_type=JSON_HB; analyze table t1 persistent for all; --source include/json_hb_histogram.inc select histogram from mysql.column_stats where table_name = 't1'; drop table t1; --echo # --echo # MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram --echo # create table t1 (c char(8)); insert into t1 values ('1x'); insert into t1 values ('1x'); insert into t1 values ('1xx'); insert into t1 values ('0xx'); insert into t1 select * from t1; insert into t1 select * from t1; set histogram_type= JSON_HB; analyze table t1 persistent for all; analyze select c from t1 where c > '1'; drop table t1; --echo # --echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values --echo # create table t0(a int); insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1(a int); insert into t1 select 100*A.a from t0 A, t0 B, t0 C; select a, count(*) from t1 group by a order by a; set histogram_type=json_hb, histogram_size=default; analyze table t1 persistent for all; --source include/json_hb_histogram.inc select * from mysql.column_stats where table_name='t1'; analyze select * from t1 where a=0; analyze select * from t1 where a=50; analyze select * from t1 where a=70; analyze select * from t1 where a=100; analyze select * from t1 where a=150; analyze select * from t1 where a=200; drop table t0,t1; --echo # --echo # MDEV-26892: JSON histograms become invalid with a specific (corrupt) value in t --echo # create table t1 (a varchar(32)) DEFAULT CHARSET=cp1257; set histogram_type= JSON_HB, histogram_size= 1; insert into t1 values ('foo'),(unhex('9C')); analyze table t1 persistent for all; select * from t1; drop table t1; --echo # --echo # MDEV-26911: Unexpected ER_DUP_KEY, ASAN errors, double free detected in tcache with JSON_HB histogram --echo # SET histogram_type= JSON_HB; CREATE TABLE t1 (pk INT AUTO_INCREMENT, f VARCHAR(8), PRIMARY KEY (pk)); INSERT INTO t1 (f) VALUES ('foo'); ANALYZE TABLE t1 PERSISTENT FOR ALL; ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk; INSERT INTO t1 (f) VALUES ('bar'); DROP TABLE t1; --echo # --echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram --echo # create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100; select count(*) from t1 where a <= 0; set histogram_type = JSON_HB, histogram_size=default; analyze table t1 persistent for all; analyze select * from t1 where a <= 0; analyze select * from t1 where a < 0; analyze select * from t1 where a > 0; analyze select * from t1 where a >= 0; drop table t1; --echo # --echo # More test coverage --echo # create table t0(a int); insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1(a int); insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; create table t2 (a int); insert into t2 select 1 from t1; insert into t2 select (a+1)*10 from t0; insert into t2 values (0); analyze table t2 persistent for all; analyze select * from t2 where a < 1; analyze select * from t2 where a =100; drop table t0,t1,t2; --echo # --echo # MDEV-27230: Estimation for filtered rows less precise ... --echo # create table t1 (a char(1)); insert into t1 select chr(seq%26+97) from seq_1_to_50; insert into t1 select ':' from t1; analyze table t1 persistent for all; analyze select COUNT(*) FROM t1 WHERE a <> 'a'; analyze select COUNT(*) FROM t1 WHERE a < 'a'; drop table t1; --echo # --echo # MDEV-27229: Estimation for filtered rows less precise ... #5 --echo # create table t1 (id int, a varchar(8)); insert into t1 select seq, 'bar' from seq_1_to_100; insert into t1 select id, 'qux' from t1; set histogram_type=JSON_HB; analyze table t1 persistent for all; analyze select COUNT(*) FROM t1 WHERE a > 'foo'; analyze select COUNT(*) FROM t1 WHERE a > 'aaa'; analyze select COUNT(*) FROM t1 WHERE a >='aaa'; analyze select COUNT(*) FROM t1 WHERE a > 'bar'; analyze select COUNT(*) FROM t1 WHERE a >='bar'; # Can enable these after get_avg_frequency issue is resolved: analyze select COUNT(*) FROM t1 WHERE a < 'aaa'; analyze select COUNT(*) FROM t1 WHERE a <='aaa'; analyze select COUNT(*) FROM t1 WHERE a < 'bar'; analyze select COUNT(*) FROM t1 WHERE a <='bar'; drop table t1; --echo # --echo # MDEV-27243: Estimation for filtered rows less precise ... #7 --echo # (Testcase only) CREATE TABLE t1 (f TIME); INSERT INTO t1 SELECT IF(seq%2,'00:00:00',SEC_TO_TIME(seq+7200)) FROM seq_1_to_1000; SET histogram_type= JSON_HB; ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE SELECT * FROM t1 WHERE f > '00:01:00'; drop table t1; --echo # --echo # MDEV-26901: Estimation for filtered rows less precise ... #4 --echo # create table t1 (f int); insert into t1 values (7),(5),(0),(5),(112),(9),(9),(7),(5),(9), (1),(7),(0),(6),(6),(2),(1),(6),(169),(7); select f from t1 where f in (77, 1, 144, 73, 14, 12); set histogram_type= JSON_HB; analyze table t1 persistent for all; analyze select f from t1 where f in (77, 1, 144, 73, 14, 12); drop table t1; --echo # --echo # Test that histograms over BIT fields use hex --echo # create table t1 (a BIT(64)); insert into t1 values (x'01'),(x'10'),(x'BE562B1A99001918'); set histogram_type= JSON_HB; analyze table t1 persistent for all; --source include/json_hb_histogram.inc select histogram from mysql.column_stats where table_name='t1' and db_name=database(); drop table t1; --echo # --echo # MDEV-28882: Assertion `tmp >= 0' failed in best_access_path --echo # CREATE TABLE t1 (a varchar(1)); INSERT INTO t1 VALUES ('o'),('s'),('j'),('s'),('y'),('s'),('l'), ('q'),('x'),('m'),('t'),('d'),('v'),('j'),('p'),('t'),('b'),('q'); set histogram_type=json_hb; analyze table t1 persistent for all; --echo # filtered must not be negative: --source include/explain-no-costs.inc explain format=json select * from t1 where a > 'y'; drop table t1;