diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2020-03-28 12:31:22 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2020-12-04 23:38:09 +0530 |
commit | 202393086e4da6c6864778dcb87339f159ea48f6 (patch) | |
tree | 203cb33f02b64b59de11f86c56ba423f352ba0d2 /sql/sql_statistics.cc | |
parent | e9f33b7760539e2ba60fb236fab8eaf0ce53ca4a (diff) | |
download | mariadb-git-bb-10.6-mdev21829.tar.gz |
MDEV-21829: Use packed sort keys in Unique objectsbb-10.6-mdev21829
The task deals with packing the values stored in the Unique tree for each record.
The changes brought by this feature is:
1) Unique tree can have dynamic length keys
2) Format of keys looks like
<key_length> <packed_value1> <packed_value2> ....... <packed_valueN>
Unique class is currently used in
1) agg_func(DISTINCT col)
Here most aggregate functions like SUM, AVG accept only fixed size arguments
so it is not beneficial to use packing for these. Packing is done for
COUNT and GROUP_CONCAT (or JSON_ARRAYAGG) aggregate function as these are meaningful
2) index-merge stores row-ids
index merge stores row-ids which are of fixed size, so packing is not required
3) Engine Independent Table statistics
Packing is done here for variable length data types
This task is an extension to MDEV-21580.
Diffstat (limited to 'sql/sql_statistics.cc')
-rw-r--r-- | sql/sql_statistics.cc | 202 |
1 files changed, 177 insertions, 25 deletions
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 7b600bd45c4..85eb5630418 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1517,6 +1517,41 @@ public: } }; + +/* + @brief + Get the offset to the value for the field in the buffer + + @param + field Field structure + + @retval + offset of the value from the start of the buffer +*/ + +uint get_offset_to_value(Field *field) +{ + return Variable_size_keys_descriptor::size_of_length_field + + MY_TEST(field->maybe_null()); +} + + +/* + @brief + Get the end of the buffer storing the value for the field + @param + buffer buffer storing the value + + @retval + return end of the buffer +*/ + +uchar* get_buffer_end(uchar *buffer) +{ + return buffer + Variable_size_keys_descriptor::read_packed_length(buffer); +} + + /* Histogram_builder is a helper class that is used to build histograms for columns @@ -1570,7 +1605,16 @@ public: return 0; if (count > bucket_capacity * (curr_bucket + 1)) { - column->store_field_value((uchar *) elem, col_length); + uchar *to= (uchar* )elem; + if (column->is_packable()) + { + column->unpack(column->ptr, + to + get_offset_to_value(column), + get_buffer_end(to), 0); + } + else + column->store_field_value(to, col_length); + histogram->set_value(curr_bucket, column->pos_in_interval(min_value, max_value)); curr_bucket++; @@ -1621,7 +1665,7 @@ protected: /* Field for which the number of distinct values is to be find out */ Field *table_field; - Unique *tree; /* The helper object to contain distinct values */ + Unique_impl *tree; /* The helper object to contain distinct values */ uint tree_key_length; /* The length of the keys for the elements of 'tree */ ulonglong distincts; @@ -1646,13 +1690,11 @@ public: of the parameters to be passed to the constructor of the Unique object. */ - Count_distinct_field(Field *field, size_t max_heap_table_size) + Count_distinct_field(Field *field) { table_field= field; - tree_key_length= field->pack_length(); - - tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, - tree_key_length, max_heap_table_size, 1); + tree_key_length= 0; + tree= NULL; } virtual ~Count_distinct_field() @@ -1670,6 +1712,63 @@ public: return (tree != NULL); } + + /* + @brief + Calculate the max length to store the length of a packable field + + @param + field Field structure + + @retval + Return the max length for a packable field + */ + + uint compute_packable_length(Field *field) + { + return table_field->max_packed_col_length(table_field->pack_length()) + + Variable_size_keys_descriptor::size_of_length_field + + MY_TEST(table_field->maybe_null()); + } + + + /* + @brief + Create and setup the Unique object for the column + + @param + thd Thread structure + max_heap_table_size max allowed size of the unique tree + + @retval + TRUE ERROR + FALSE SUCCESS + */ + + virtual bool setup(THD *thd, size_t max_heap_table_size) + { + Descriptor *desc; + if (table_field->is_packable()) + { + tree_key_length= compute_packable_length(table_field); + desc= new Variable_size_keys_simple(tree_key_length); + } + else + { + tree_key_length= table_field->pack_length(); + desc= new Fixed_size_keys_descriptor(tree_key_length); + } + if (!desc) + return true; // OOM + tree= new Unique_impl((qsort_cmp2) key_cmp, + (void*) this, tree_key_length, + max_heap_table_size, 1, desc); + if (!tree) + return true; // OOM + return tree->get_descriptor()->setup_for_field(thd, table_field); + } + + /* @brief Add the value of 'field' to the container of the Unique object 'tree' @@ -1677,6 +1776,14 @@ public: virtual bool add() { table_field->mark_unused_memory_as_defined(); + DBUG_ASSERT(tree); + if (tree->is_variable_sized()) + { + Descriptor *descriptor= tree->get_descriptor(); + uchar *rec_ptr= descriptor->make_record(true); + DBUG_ASSERT(descriptor->get_length_of_key(rec_ptr) <= tree->get_size()); + return tree->unique_add(rec_ptr); + } return tree->unique_add(table_field->ptr); } @@ -1733,17 +1840,31 @@ public: return table_field->collected_stats->histogram.get_values(); } + static int key_cmp(void* arg, uchar* key1, uchar* key2); }; +/* + @brief + Compare function for packed keys + @param arg Pointer to the relevant class instance + @param key1 left key image + @param key2 right key image -static -int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2) + + @return comparison result + @retval < 0 if key1 < key2 + @retval = 0 if key1 = key2 + @retval > 0 if key1 > key2 +*/ +int Count_distinct_field::key_cmp(void* arg, + uchar* key1, + uchar* key2) { - ulonglong *val1= (ulonglong *) key1; - ulonglong *val2= (ulonglong *) key2; - return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1; + Count_distinct_field *compare_arg= (Count_distinct_field*)arg; + DBUG_ASSERT(compare_arg->tree->get_descriptor()); + return compare_arg->tree->get_descriptor()->compare_keys(key1, key2); } - + /* The class Count_distinct_field_bit is derived from the class @@ -1755,24 +1876,52 @@ class Count_distinct_field_bit: public Count_distinct_field { public: - Count_distinct_field_bit(Field *field, size_t max_heap_table_size) - { - table_field= field; - tree_key_length= sizeof(ulonglong); - - tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, - (void*) &tree_key_length, - tree_key_length, max_heap_table_size, 1); - } + Count_distinct_field_bit(Field *field): Count_distinct_field(field){} bool add() { - longlong val= table_field->val_int(); + longlong val= table_field->val_int(); return tree->unique_add(&val); } + bool setup(THD *thd, size_t max_heap_table_size) override + { + tree_key_length= sizeof(ulonglong); + Descriptor *desc= new Fixed_size_keys_mem_comparable(tree_key_length); + if (!desc) + return true; + tree= new Unique_impl((qsort_cmp2) simple_ulonglong_key_cmp, + (void*) this, + tree_key_length, max_heap_table_size, 1, desc); + return tree == NULL; + } + static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2); }; +/* + @brief + Compare function for Bit fields + + @param arg Pointer to the relevant class instance + @param key1 left key image + @param key2 right key image + + + @return comparison result + @retval < 0 if key1 < key2 + @retval = 0 if key1 = key2 + @retval > 0 if key1 > key2 +*/ +int Count_distinct_field_bit::simple_ulonglong_key_cmp(void* arg, + uchar* key1, + uchar* key2) +{ + Count_distinct_field_bit *compare_arg= (Count_distinct_field_bit*)arg; + DBUG_ASSERT(compare_arg->tree->get_descriptor()); + return compare_arg->tree->get_descriptor()->compare_keys(key1, key2); +} + + /* The class Index_prefix_calc is a helper class used to calculate the values for the column 'avg_frequency' of the statistical table index_stats. @@ -2344,8 +2493,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field) { count_distinct= table_field->type() == MYSQL_TYPE_BIT ? - new Count_distinct_field_bit(table_field, max_heap_table_size) : - new Count_distinct_field(table_field, max_heap_table_size); + new Count_distinct_field_bit(table_field) : + new Count_distinct_field(table_field); + + if (count_distinct && count_distinct->setup(thd, max_heap_table_size)) + count_distinct= NULL; } if (count_distinct && !count_distinct->exists()) count_distinct= NULL; |