summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorVicențiu Ciorbaru <vicentiu@mariadb.org>2019-02-10 01:43:15 +0200
committerVicențiu Ciorbaru <vicentiu@mariadb.org>2019-02-10 01:43:15 +0200
commit25947c60d54b969359521b9ca55da3054c600745 (patch)
treed28924673e6f4ddab059251395d60b9f189ed7c2 /sql
parent30a18eed822c207880a52b31c566525dfef8fb55 (diff)
downloadmariadb-git-bb-10.4-vicentiu-histograms.tar.gz
Default Bernoulli Sampling implementationbb-10.4-vicentiu-histograms10.4-vicentiu-histograms
Diffstat (limited to 'sql')
-rw-r--r--sql/handler.h44
-rw-r--r--sql/sql_statistics.cc34
2 files changed, 56 insertions, 22 deletions
diff --git a/sql/handler.h b/sql/handler.h
index 8f9ddc01174..d40e986fd9d 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1906,6 +1906,11 @@ enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
HA_STATS_AUTO_RECALC_ON,
HA_STATS_AUTO_RECALC_OFF };
+enum sample_mode {
+ HA_SAMPLE_BERNOULLI= 0,
+ HA_SAMPLE_SYSTEM,
+};
+
/**
A helper struct for schema DDL statements:
CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ]
@@ -2940,9 +2945,11 @@ public:
/** Length of ref (1-8 or the clustered key length) */
uint ref_length;
FT_INFO *ft_handler;
- enum init_stat { NONE=0, INDEX, RND, RANDOM };
+ enum init_stat { NONE=0, INDEX, RND, SAMPLE };
init_stat inited, pre_inited;
+ double sample_fraction= 0;
+ enum sample_mode sample_mode;
const COND *pushed_cond;
/**
next_insert_id is the next value which should be inserted into the
@@ -3105,21 +3112,25 @@ public:
virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key)
{ return 0; }
- virtual int ha_random_sample_init(THD *thd, ha_rows estimate_rows_read)
+ int ha_random_sample_init(THD *thd, enum sample_mode mode, double fraction)
__attribute__((warn_unused_result))
{
DBUG_ENTER("ha_random_sample_init");
- inited= RANDOM;
- DBUG_RETURN(random_sample_init(thd, estimate_rows_read));
+ DBUG_ASSERT(inited==NONE);
+ int result;
+ sample_mode= mode;
+ sample_fraction= fraction;
+ inited= (result= random_sample_init(mode, fraction)) ? NONE : SAMPLE;
+ DBUG_RETURN(result);
}
- virtual int ha_random_sample(uchar *buf)
+ int ha_random_sample(uchar *buf)
__attribute__((warn_unused_result))
{
DBUG_ENTER("ha_random_sample");
- DBUG_ASSERT(inited == RANDOM);
+ DBUG_ASSERT(inited == SAMPLE);
DBUG_RETURN(random_sample(buf));
}
- virtual int ha_random_sample_end() __attribute__((warn_unused_result))
+ int ha_random_sample_end()
{
DBUG_ENTER("ha_random_sample_end");
inited= NONE;
@@ -4439,12 +4450,25 @@ private:
/* Note: ha_index_read_idx_map() may bypass index_init() */
virtual int index_init(uint idx, bool sorted) { return 0; }
virtual int index_end() { return 0; }
- virtual int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) { return 0; } ;
+ virtual int random_sample_init(enum sample_mode mode, double fraction)
+ {
+ return rnd_init(TRUE);
+ }
virtual int random_sample(uchar *buf)
{
- return HA_ERR_WRONG_COMMAND;
+ int rc;
+ THD *thd= ha_thd();
+ do
+ {
+ rc= rnd_next(buf);
+
+ if (rc == HA_ERR_RECORD_DELETED)
+ continue;
+
+ } while (rc == HA_ERR_RECORD_DELETED || thd_rnd(thd) > sample_fraction);
+ return rc;
}
- virtual int random_sample_end() { return 0; };
+ virtual int random_sample_end() { return rnd_end(); }
/**
rnd_init() can be called two times without rnd_end() in between
(it only makes sense if scan=1).
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index daebc5d0b38..b8b99015745 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -2727,12 +2727,15 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
Field *table_field;
ha_rows rows= 0;
handler *file=table->file;
+ double sample_fraction;
DBUG_ENTER("collect_statistics_for_table");
table->collected_stats->cardinality_is_null= TRUE;
table->collected_stats->cardinality= 0;
+ table->file->info(HA_STATUS_VARIABLE);
+
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
@@ -2743,19 +2746,27 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
restore_record(table, s->default_values);
- rc= file->ha_random_sample_init(thd, 100);
- rc= file->ha_random_sample(table->record[0]);
- table_field->collected_stats->add(0);
- rc= file->ha_random_sample_end();
+ if (file->records() < 30000)
+ {
+ sample_fraction= 1;
+ }
+ else
+ {
+ sample_fraction= std::fmin(
+ (30000 + 4096 * log(200 * file->records())) /
+ (file->records() + 1), 1);
+ }
+
+
+ /* Fetch samples from the table to collect statistics on table's columns */
- /* Perform a full table scan to collect statistics on 'table's columns */
- /*
- if (!(rc= file->ha_rnd_init(TRUE)))
- {
+ if (!(rc= file->ha_random_sample_init(thd, HA_SAMPLE_BERNOULLI,
+ sample_fraction)))
+ {
DEBUG_SYNC(table->in_use, "statistics_collection_start");
- while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
+ while ((rc= file->ha_random_sample(table->record[0])) != HA_ERR_END_OF_FILE)
{
if (thd->killed)
break;
@@ -2775,10 +2786,9 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
break;
rows++;
}
- file->ha_rnd_end();
+ file->ha_random_sample_end();
}
rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1;
- */
/*
Calculate values for all statistical characteristics on columns and
and for each field f of 'table' save them in the write_stat structure
@@ -2787,7 +2797,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
if (!rc)
{
table->collected_stats->cardinality_is_null= FALSE;
- table->collected_stats->cardinality= rows;
+ table->collected_stats->cardinality= rows / sample_fraction;
}
bitmap_clear_all(table->write_set);