diff options
author | Igor Babaev <igor@askmonty.org> | 2013-04-06 15:36:28 -0700 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2013-04-06 15:36:28 -0700 |
commit | 10f0530b22d1b06c79be9fef26febebf3b9ec4a5 (patch) | |
tree | ddc4d541ec04f3d575bca47c512d6201342defef | |
parent | 1c30fb2a156b9c510e1829108bc8db71dcea3d43 (diff) | |
download | mariadb-git-10f0530b22d1b06c79be9fef26febebf3b9ec4a5.tar.gz |
Fixed bug mdev-4369.
The function was adjusted to be able to aggregate
the counters of the merged elements.
Before this change it was not possible to guarantee the correctness
of the counters passed to the the call-back parameter walk_action.
As a result, when some elements of a Unique object were flushed into
disk the function passed to merge_walk() as the call-back parameter
could return wrong counters of elements. This could lead to building
wrong histograms.
-rw-r--r-- | mysql-test/r/statistics.result | 39 | ||||
-rw-r--r-- | mysql-test/t/statistics.test | 43 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 4 | ||||
-rw-r--r-- | sql/uniques.cc | 40 |
5 files changed, 121 insertions, 6 deletions
diff --git a/mysql-test/r/statistics.result b/mysql-test/r/statistics.result index 9291b945193..20469c01a2e 100644 --- a/mysql-test/r/statistics.result +++ b/mysql-test/r/statistics.result @@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF set histogram_size=default; set histogram_type=default; drop table t1; +# +# Bug mdev-4369: histogram for a column with many distinct values +# +CREATE TABLE t1 (id int); +CREATE TABLE t2 (id int); +INSERT INTO t1 (id) VALUES (1), (1), (1),(1); +INSERT INTO t1 (id) SELECT id FROM t1; +INSERT INTO t1 SELECT id+1 FROM t1; +INSERT INTO t1 SELECT id+2 FROM t1; +INSERT INTO t1 SELECT id+4 FROM t1; +INSERT INTO t1 SELECT id+8 FROM t1; +INSERT INTO t1 SELECT id+16 FROM t1; +INSERT INTO t1 SELECT id+32 FROM t1; +INSERT INTO t1 SELECT id+64 FROM t1; +INSERT INTO t1 SELECT id+128 FROM t1; +INSERT INTO t1 SELECT id+256 FROM t1; +INSERT INTO t1 SELECT id+512 FROM t1; +INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); +SELECT COUNT(*) FROM t2; +COUNT(*) +8192 +SELECT COUNT(DISTINCT id) FROM t2; +COUNT(DISTINCT id) +1024 +set @@tmp_table_size=1024*16; +set @@max_heap_table_size=1024*16; +set histogram_size=63; +analyze table t2 persistent for all; +Table Op Msg_type Msg_text +test.t2 analyze status OK +select db_name, table_name, column_name, +min_value, max_value, +nulls_ratio, avg_frequency, +hist_size, hist_type, HEX(histogram) +FROM mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) +test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB +set histogram_size=default; +drop table t1, t2; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/t/statistics.test b/mysql-test/t/statistics.test index 4d11e11f033..b2a052fd3e8 100644 --- a/mysql-test/t/statistics.test +++ b/mysql-test/t/statistics.test @@ -638,5 +638,48 @@ set histogram_type=default; drop table t1; +--echo # +--echo # Bug mdev-4369: histogram for a column with many distinct values +--echo # + + +CREATE TABLE t1 (id int); +CREATE TABLE t2 (id int); + +INSERT INTO t1 (id) VALUES (1), (1), (1),(1); +INSERT INTO t1 (id) SELECT id FROM t1; +INSERT INTO t1 SELECT id+1 FROM t1; +INSERT INTO t1 SELECT id+2 FROM t1; +INSERT INTO t1 SELECT id+4 FROM t1; +INSERT INTO t1 SELECT id+8 FROM t1; +INSERT INTO t1 SELECT id+16 FROM t1; +INSERT INTO t1 SELECT id+32 FROM t1; +INSERT INTO t1 SELECT id+64 FROM t1; +INSERT INTO t1 SELECT id+128 FROM t1; +INSERT INTO t1 SELECT id+256 FROM t1; +INSERT INTO t1 SELECT id+512 FROM t1; + +INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); + +SELECT COUNT(*) FROM t2; +SELECT COUNT(DISTINCT id) FROM t2; + +set @@tmp_table_size=1024*16; +set @@max_heap_table_size=1024*16; + +set histogram_size=63; + +analyze table t2 persistent for all; + +select db_name, table_name, column_name, + min_value, max_value, + nulls_ratio, avg_frequency, + hist_size, hist_type, HEX(histogram) + FROM mysql.column_stats; + +set histogram_size=default; + +drop table t1, t2; + set use_stat_tables=@save_use_stat_tables; diff --git a/sql/sql_class.h b/sql/sql_class.h index 599f59c7c27..286fbc4f81b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc uint size; uint full_size; uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ + bool with_counters; bool merge(TABLE *table, uchar *buff, bool without_last_merge); diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 6e476b34e0d..37c3a93ee08 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1348,7 +1348,7 @@ public: tree_key_length= field->pack_length(); tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, - tree_key_length, max_heap_table_size); + tree_key_length, max_heap_table_size, 1); } virtual ~Count_distinct_field() @@ -1435,7 +1435,7 @@ public: tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, (void*) &tree_key_length, - tree_key_length, max_heap_table_size); + tree_key_length, max_heap_table_size, 1); } bool add() diff --git a/sql/uniques.cc b/sql/uniques.cc index 9fa06311ece..0c1c34d495b 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, full_size= size; if (min_dupl_count_arg) full_size+= sizeof(element_count); + with_counters= test(min_dupl_count_arg); my_b_clear(&file); init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); @@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2) C_MODE_END +inline +element_count get_counter_from_merged_element(void *ptr, uint ofs) +{ + element_count cnt; + memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count)); + return cnt; +} + + +inline +void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt) +{ + memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count)); +} + + /* DESCRIPTION @@ -457,6 +474,8 @@ C_MODE_END file file with all trees dumped. Trees in the file must contain sorted unique values. Cache must be initialized in read mode. + with counters take into account counters for equal merged + elements RETURN VALUE 0 ok <> 0 error @@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint key_length, BUFFPEK *begin, BUFFPEK *end, tree_walk_action walk_action, void *walk_action_arg, qsort_cmp2 compare, void *compare_arg, - IO_CACHE *file) + IO_CACHE *file, bool with_counters) { BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; QUEUE queue; @@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint bytes_read; /* to hold return value of read_to_buffer */ BUFFPEK *top; int res= 1; + uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); + element_count cnt; /* Invariant: queue must contain top element from each tree, until a tree is not completely walked through. @@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, /* new top has been obtained; if old top is unique, apply the action */ if (compare(compare_arg, old_key, top->key)) { - if (walk_action(old_key, 1, walk_action_arg)) + cnt= with_counters ? + get_counter_from_merged_element(old_key, cnt_ofs) : 1; + if (walk_action(old_key, cnt, walk_action_arg)) goto end; } + else if (with_counters) + { + cnt= get_counter_from_merged_element(top->key, cnt_ofs); + cnt+= get_counter_from_merged_element(old_key, cnt_ofs); + put_counter_into_merged_element(top->key, cnt_ofs, cnt); + } } /* Applying walk_action to the tail of the last tree: this is safe because @@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, { do { - if (walk_action(top->key, 1, walk_action_arg)) + + cnt= with_counters ? + get_counter_from_merged_element(top->key, cnt_ofs) : 1; + if (walk_action(top->key, cnt, walk_action_arg)) goto end; top->key+= key_length; } @@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) (BUFFPEK *) file_ptrs.buffer, (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements, action, walk_action_arg, - tree.compare, tree.custom_arg, &file); + tree.compare, tree.custom_arg, &file, with_counters); } my_free(merge_buffer); return res; |