summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/main/mysqld--help.result5
-rw-r--r--mysql-test/main/statistics.result104
-rw-r--r--mysql-test/main/statistics.test88
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_server_embedded.result14
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result14
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_statistics.cc40
-rw-r--r--sql/sys_vars.cc9
8 files changed, 265 insertions, 10 deletions
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index e130f324da0..65fda40720d 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -15,6 +15,10 @@ The following specify which files/extra groups are read (specified before remain
--alter-algorithm[=name]
Specify the alter table algorithm. One of: DEFAULT, COPY,
INPLACE, NOCOPY, INSTANT
+ --analyze-sample-percentage=#
+ Percentage of rows from the table ANALYZE TABLE will
+ sample to collect table statistics. Set to 0 to let
+ MariaDB decide what percentage of rows to sample.
-a, --ansi Use ANSI SQL syntax instead of MySQL syntax. This mode
will also set transaction isolation level 'serializable'.
--auto-increment-increment[=#]
@@ -1385,6 +1389,7 @@ The following specify which files/extra groups are read (specified before remain
Variables (--variable-name=value)
allow-suspicious-udfs FALSE
alter-algorithm DEFAULT
+analyze-sample-percentage 100
auto-increment-increment 1
auto-increment-offset 1
autocommit TRUE
diff --git a/mysql-test/main/statistics.result b/mysql-test/main/statistics.result
index 5f6a7cd7bb3..ee97ec05391 100644
--- a/mysql-test/main/statistics.result
+++ b/mysql-test/main/statistics.result
@@ -1760,3 +1760,107 @@ DROP TABLE t1;
# End of 10.2 tests
#
set histogram_size=@save_hist_size, histogram_type=@save_hist_type;
+#
+# Start of 10.4 tests
+#
+#
+# Test analyze_sample_percentage system variable.
+#
+set @save_use_stat_tables=@@use_stat_tables;
+set @save_analyze_sample_percentage=@@analyze_sample_percentage;
+set session rand_seed1=42;
+set session rand_seed2=62;
+set use_stat_tables=PREFERABLY;
+set histogram_size=10;
+CREATE TABLE t1 (id int);
+INSERT INTO t1 (id) VALUES (1), (1), (1), (1), (1), (1), (1);
+INSERT INTO t1 (id) SELECT id FROM t1;
+INSERT INTO t1 SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+8 FROM t1;
+INSERT INTO t1 SELECT id+16 FROM t1;
+INSERT INTO t1 SELECT id+32 FROM t1;
+INSERT INTO t1 SELECT id+64 FROM t1;
+INSERT INTO t1 SELECT id+128 FROM t1;
+INSERT INTO t1 SELECT id+256 FROM t1;
+INSERT INTO t1 SELECT id+512 FROM t1;
+INSERT INTO t1 SELECT id+1024 FROM t1;
+INSERT INTO t1 SELECT id+2048 FROM t1;
+INSERT INTO t1 SELECT id+4096 FROM t1;
+INSERT INTO t1 SELECT id+9192 FROM t1;
+#
+# This query will should show a full table scan analysis.
+#
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
+t1 id 1 17384 0.0000 4.0000 14.0000 0.082,0.086,0.086,0.086,0.086,0.141,0.086,0.086,0.086,0.086,0.086
+set analyze_sample_percentage=0.1;
+#
+# This query will show an innacurate avg_frequency value.
+#
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
+t1 id 111 17026 0.0000 4.0000 1.0047 0.039,0.098,0.055,0.118,0.078,0.157,0.082,0.118,0.094,0.063,0.098
+#
+# This query will show a better avg_frequency value.
+#
+set analyze_sample_percentage=25;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
+t1 id 1 17384 0.0000 4.0000 3.5736 0.082,0.086,0.086,0.082,0.086,0.145,0.086,0.086,0.082,0.086,0.090
+set analyze_sample_percentage=0;
+#
+# Test self adjusting sampling level.
+#
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
+t1 id 1 17384 0.0000 4.0000 7.4523 0.082,0.090,0.086,0.082,0.086,0.145,0.086,0.082,0.086,0.086,0.086
+#
+# Test record estimation is working properly.
+#
+select count(*) from t1;
+count(*)
+229376
+explain select * from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 229060
+set analyze_sample_percentage=100;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
+t1 id 1 17384 0.0000 4.0000 14.0000 0.082,0.086,0.086,0.086,0.086,0.141,0.086,0.086,0.086,0.086,0.086
+explain select * from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
+set use_stat_tables=@save_use_stat_tables;
+drop table t1;
diff --git a/mysql-test/main/statistics.test b/mysql-test/main/statistics.test
index 78439e3e68a..84bb7c10059 100644
--- a/mysql-test/main/statistics.test
+++ b/mysql-test/main/statistics.test
@@ -901,3 +901,91 @@ DROP TABLE t1;
--echo #
set histogram_size=@save_hist_size, histogram_type=@save_hist_type;
+
+
+--echo #
+--echo # Start of 10.4 tests
+--echo #
+
+--echo #
+--echo # Test analyze_sample_percentage system variable.
+--echo #
+set @save_use_stat_tables=@@use_stat_tables;
+set @save_analyze_sample_percentage=@@analyze_sample_percentage;
+
+set session rand_seed1=42;
+set session rand_seed2=62;
+
+set use_stat_tables=PREFERABLY;
+set histogram_size=10;
+
+CREATE TABLE t1 (id int);
+INSERT INTO t1 (id) VALUES (1), (1), (1), (1), (1), (1), (1);
+INSERT INTO t1 (id) SELECT id FROM t1;
+INSERT INTO t1 SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+8 FROM t1;
+INSERT INTO t1 SELECT id+16 FROM t1;
+INSERT INTO t1 SELECT id+32 FROM t1;
+INSERT INTO t1 SELECT id+64 FROM t1;
+INSERT INTO t1 SELECT id+128 FROM t1;
+INSERT INTO t1 SELECT id+256 FROM t1;
+INSERT INTO t1 SELECT id+512 FROM t1;
+INSERT INTO t1 SELECT id+1024 FROM t1;
+INSERT INTO t1 SELECT id+2048 FROM t1;
+INSERT INTO t1 SELECT id+4096 FROM t1;
+INSERT INTO t1 SELECT id+9192 FROM t1;
+
+--echo #
+--echo # This query will should show a full table scan analysis.
+--echo #
+ANALYZE TABLE t1;
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+ DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+
+set analyze_sample_percentage=0.1;
+
+--echo #
+--echo # This query will show an innacurate avg_frequency value.
+--echo #
+ANALYZE TABLE t1;
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+ DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+
+--echo #
+--echo # This query will show a better avg_frequency value.
+--echo #
+set analyze_sample_percentage=25;
+ANALYZE TABLE t1;
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+ DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+
+
+set analyze_sample_percentage=0;
+--echo #
+--echo # Test self adjusting sampling level.
+--echo #
+ANALYZE TABLE t1;
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+ DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+-- echo #
+-- echo # Test record estimation is working properly.
+-- echo #
+select count(*) from t1;
+explain select * from t1;
+
+set analyze_sample_percentage=100;
+ANALYZE TABLE t1;
+select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
+ DECODE_HISTOGRAM(hist_type, histogram)
+from mysql.column_stats;
+explain select * from t1;
+
+set use_stat_tables=@save_use_stat_tables;
+
+drop table t1;
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
index c517403f842..67cab18a6bb 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
@@ -40,6 +40,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
+VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
+SESSION_VALUE 100.000000
+GLOBAL_VALUE 100.000000
+GLOBAL_VALUE_ORIGIN COMPILE-TIME
+DEFAULT_VALUE 100.000000
+VARIABLE_SCOPE SESSION
+VARIABLE_TYPE DOUBLE
+VARIABLE_COMMENT Percentage of rows from the table ANALYZE TABLE will sample to collect table statistics. Set to 0 to let MariaDB decide what percentage of rows to sample.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 100
+NUMERIC_BLOCK_SIZE NULL
+ENUM_VALUE_LIST NULL
+READ_ONLY NO
+COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME AUTOCOMMIT
SESSION_VALUE ON
GLOBAL_VALUE ON
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index 6d0eb0f8ef2..35ce738960f 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -40,6 +40,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
+VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
+SESSION_VALUE 100.000000
+GLOBAL_VALUE 100.000000
+GLOBAL_VALUE_ORIGIN COMPILE-TIME
+DEFAULT_VALUE 100.000000
+VARIABLE_SCOPE SESSION
+VARIABLE_TYPE DOUBLE
+VARIABLE_COMMENT Percentage of rows from the table ANALYZE TABLE will sample to collect table statistics. Set to 0 to let MariaDB decide what percentage of rows to sample.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 100
+NUMERIC_BLOCK_SIZE NULL
+ENUM_VALUE_LIST NULL
+READ_ONLY NO
+COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME AUTOCOMMIT
SESSION_VALUE ON
GLOBAL_VALUE ON
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 56b8aca19ab..3b0099ccae8 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -622,6 +622,7 @@ typedef struct system_variables
ulong optimizer_selectivity_sampling_limit;
ulong optimizer_use_condition_selectivity;
ulong use_stat_tables;
+ double sample_percentage;
ulong histogram_size;
ulong histogram_type;
ulong preload_buff_size;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index f903ce143a4..27fab974441 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -2729,12 +2729,28 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
Field *table_field;
ha_rows rows= 0;
handler *file=table->file;
+ double sample_fraction= thd->variables.sample_percentage / 100;
+ const ha_rows MIN_THRESHOLD_FOR_SAMPLING= 50000;
DBUG_ENTER("collect_statistics_for_table");
table->collected_stats->cardinality_is_null= TRUE;
table->collected_stats->cardinality= 0;
+ if (thd->variables.sample_percentage == 0)
+ {
+ if (file->records() < MIN_THRESHOLD_FOR_SAMPLING)
+ {
+ sample_fraction= 1;
+ }
+ else
+ {
+ sample_fraction= std::fmin(
+ (MIN_THRESHOLD_FOR_SAMPLING + 4096 *
+ log(200 * file->records())) / file->records(), 1);
+ }
+ }
+
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
@@ -2747,7 +2763,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
/* Perform a full table scan to collect statistics on 'table's columns */
if (!(rc= file->ha_rnd_init(TRUE)))
- {
+ {
DEBUG_SYNC(table->in_use, "statistics_collection_start");
while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
@@ -2758,17 +2774,20 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
if (rc)
break;
- for (field_ptr= table->field; *field_ptr; field_ptr++)
+ if (thd_rnd(thd) <= sample_fraction)
{
- table_field= *field_ptr;
- if (!bitmap_is_set(table->read_set, table_field->field_index))
- continue;
- if ((rc= table_field->collected_stats->add()))
+ for (field_ptr= table->field; *field_ptr; field_ptr++)
+ {
+ table_field= *field_ptr;
+ if (!bitmap_is_set(table->read_set, table_field->field_index))
+ continue;
+ if ((rc= table_field->collected_stats->add()))
+ break;
+ }
+ if (rc)
break;
+ rows++;
}
- if (rc)
- break;
- rows++;
}
file->ha_rnd_end();
}
@@ -2782,7 +2801,8 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
if (!rc)
{
table->collected_stats->cardinality_is_null= FALSE;
- table->collected_stats->cardinality= rows;
+ table->collected_stats->cardinality=
+ static_cast<ha_rows>(rows / sample_fraction);
}
bitmap_clear_all(table->write_set);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index df9e8525689..dbfbb452ace 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -350,6 +350,15 @@ static Sys_var_long Sys_pfs_connect_attrs_size(
#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
+static Sys_var_double Sys_analyze_sample_percentage(
+ "analyze_sample_percentage",
+ "Percentage of rows from the table ANALYZE TABLE will sample "
+ "to collect table statistics. Set to 0 to let MariaDB decide "
+ "what percentage of rows to sample.",
+ SESSION_VAR(sample_percentage),
+ CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100),
+ DEFAULT(100));
+
static Sys_var_ulong Sys_auto_increment_increment(
"auto_increment_increment",
"Auto-increment columns are incremented by this",