diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/handler.h | 44 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 34 |
2 files changed, 56 insertions, 22 deletions
diff --git a/sql/handler.h b/sql/handler.h index 8f9ddc01174..d40e986fd9d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1906,6 +1906,11 @@ enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0, HA_STATS_AUTO_RECALC_ON, HA_STATS_AUTO_RECALC_OFF }; +enum sample_mode { + HA_SAMPLE_BERNOULLI= 0, + HA_SAMPLE_SYSTEM, +}; + /** A helper struct for schema DDL statements: CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ] @@ -2940,9 +2945,11 @@ public: /** Length of ref (1-8 or the clustered key length) */ uint ref_length; FT_INFO *ft_handler; - enum init_stat { NONE=0, INDEX, RND, RANDOM }; + enum init_stat { NONE=0, INDEX, RND, SAMPLE }; init_stat inited, pre_inited; + double sample_fraction= 0; + enum sample_mode sample_mode; const COND *pushed_cond; /** next_insert_id is the next value which should be inserted into the @@ -3105,21 +3112,25 @@ public: virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key) { return 0; } - virtual int ha_random_sample_init(THD *thd, ha_rows estimate_rows_read) + int ha_random_sample_init(THD *thd, enum sample_mode mode, double fraction) __attribute__((warn_unused_result)) { DBUG_ENTER("ha_random_sample_init"); - inited= RANDOM; - DBUG_RETURN(random_sample_init(thd, estimate_rows_read)); + DBUG_ASSERT(inited==NONE); + int result; + sample_mode= mode; + sample_fraction= fraction; + inited= (result= random_sample_init(mode, fraction)) ? NONE : SAMPLE; + DBUG_RETURN(result); } - virtual int ha_random_sample(uchar *buf) + int ha_random_sample(uchar *buf) __attribute__((warn_unused_result)) { DBUG_ENTER("ha_random_sample"); - DBUG_ASSERT(inited == RANDOM); + DBUG_ASSERT(inited == SAMPLE); DBUG_RETURN(random_sample(buf)); } - virtual int ha_random_sample_end() __attribute__((warn_unused_result)) + int ha_random_sample_end() { DBUG_ENTER("ha_random_sample_end"); inited= NONE; @@ -4439,12 +4450,25 @@ private: /* Note: ha_index_read_idx_map() may bypass index_init() */ virtual int index_init(uint idx, bool sorted) { return 0; } virtual int index_end() { return 0; } - virtual int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) { return 0; } ; + virtual int random_sample_init(enum sample_mode mode, double fraction) + { + return rnd_init(TRUE); + } virtual int random_sample(uchar *buf) { - return HA_ERR_WRONG_COMMAND; + int rc; + THD *thd= ha_thd(); + do + { + rc= rnd_next(buf); + + if (rc == HA_ERR_RECORD_DELETED) + continue; + + } while (rc == HA_ERR_RECORD_DELETED || thd_rnd(thd) > sample_fraction); + return rc; } - virtual int random_sample_end() { return 0; }; + virtual int random_sample_end() { return rnd_end(); } /** rnd_init() can be called two times without rnd_end() in between (it only makes sense if scan=1). diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index daebc5d0b38..b8b99015745 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2727,12 +2727,15 @@ int collect_statistics_for_table(THD *thd, TABLE *table) Field *table_field; ha_rows rows= 0; handler *file=table->file; + double sample_fraction; DBUG_ENTER("collect_statistics_for_table"); table->collected_stats->cardinality_is_null= TRUE; table->collected_stats->cardinality= 0; + table->file->info(HA_STATUS_VARIABLE); + for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; @@ -2743,19 +2746,27 @@ int collect_statistics_for_table(THD *thd, TABLE *table) restore_record(table, s->default_values); - rc= file->ha_random_sample_init(thd, 100); - rc= file->ha_random_sample(table->record[0]); - table_field->collected_stats->add(0); - rc= file->ha_random_sample_end(); + if (file->records() < 30000) + { + sample_fraction= 1; + } + else + { + sample_fraction= std::fmin( + (30000 + 4096 * log(200 * file->records())) / + (file->records() + 1), 1); + } + + + /* Fetch samples from the table to collect statistics on table's columns */ - /* Perform a full table scan to collect statistics on 'table's columns */ - /* - if (!(rc= file->ha_rnd_init(TRUE))) - { + if (!(rc= file->ha_random_sample_init(thd, HA_SAMPLE_BERNOULLI, + sample_fraction))) + { DEBUG_SYNC(table->in_use, "statistics_collection_start"); - while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + while ((rc= file->ha_random_sample(table->record[0])) != HA_ERR_END_OF_FILE) { if (thd->killed) break; @@ -2775,10 +2786,9 @@ int collect_statistics_for_table(THD *thd, TABLE *table) break; rows++; } - file->ha_rnd_end(); + file->ha_random_sample_end(); } rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; - */ /* Calculate values for all statistical characteristics on columns and and for each field f of 'table' save them in the write_stat structure @@ -2787,7 +2797,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) if (!rc) { table->collected_stats->cardinality_is_null= FALSE; - table->collected_stats->cardinality= rows; + table->collected_stats->cardinality= rows / sample_fraction; } bitmap_clear_all(table->write_set); |