diff options
author | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2022-07-02 22:15:22 +0300 |
---|---|---|
committer | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2022-07-03 01:36:29 +0300 |
commit | baf8be9f67a1a3d795e8c7841615ad6b4dc09328 (patch) | |
tree | 34c89a3ce12019f4af6d34c1b19343333569a333 | |
parent | bb30310e1a35ef5989bcfe4ab18063739d91cb17 (diff) | |
download | mariadb-git-10.7-vicentiu-selectivity.tar.gz |
Implement cost_for_order_by function10.7-vicentiu-selectivity
The sort length is extracted similarly to how sortlength() function does
it. The function makes use of filesort_use_addons function to compute
the length of addon fields. Finally, by calling compute_sort_costs we
get the fastest_sort possible.
Other changes:
* Sort_param::using_addon_fields() assumes addon fields are already
allocated. This makes the use of Sort_param unusable for
compute_sort_costs *if* we don't want to allocate addon fields.
As a preliminary fix, pass "with_addon_fields" as bool value to
compute_sort_costs() and make the internal functions use that value
instead of Sort_param::using_addon_fields() method.
The ideal fix would be to define a "leaner" struct with only the
necessary members, but this can be done as a separate commit.
* Remove unnecessary fields from Sort_costs::costs array.
As we do not yet differentiate between merge sort with fixed length vs
dynamic length fields, eliminate that differentiation from enum sort_type.
It can be added at a later date when we indeed have a cost
differentiation.
* Fixup comments
-rw-r--r-- | sql/filesort.cc | 3 | ||||
-rw-r--r-- | sql/filesort_utils.cc | 75 | ||||
-rw-r--r-- | sql/filesort_utils.h | 45 |
3 files changed, 92 insertions, 31 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc index 54154297a8d..6ff194fd6cf 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -280,7 +280,8 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, num_rows= table->file->estimate_rows_upper_bound(); Sort_costs costs; - costs.compute_sort_costs(¶m, num_rows, memory_available); + costs.compute_sort_costs(¶m, num_rows, memory_available, + param.using_addon_fields()); if (costs.fastest_sort == NO_SORT_POSSIBLE_OUT_OF_MEM) { diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc index 2644e85aa6c..79323cea3ce 100644 --- a/sql/filesort_utils.cc +++ b/sql/filesort_utils.cc @@ -198,7 +198,8 @@ void Sort_costs::compute_fastest_sort() */ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available) + size_t memory_available, + bool with_addon_fields) { /* Implementation detail of PQ. To be able to keep a PQ of size N we need @@ -233,7 +234,7 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, } /* Calculate cost with addon fields */ - if (param->using_addon_fields()) + if (with_addon_fields) { row_length= param->rec_length + sizeof(char *); num_available_keys= memory_available / row_length; @@ -250,31 +251,30 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, void Sort_costs::compute_merge_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available) + size_t memory_available, + bool with_addon_fields) { size_t row_length= param->sort_length + param->ref_length + sizeof(char *); size_t num_available_keys= memory_available / row_length; - costs[MERGE_SORT_ALL_FIELDS_FIXED_LENGTH]= DBL_MAX; - costs[MERGE_SORT_ALL_FIELDS_DYNAMIC_LENGTH]= DBL_MAX; - costs[MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH]= DBL_MAX; - costs[MERGE_SORT_ORDER_BY_FIELDS_DYNAMIC_LENGTH]= DBL_MAX; + costs[MERGE_SORT_ALL_FIELDS]= DBL_MAX; + costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX; if (num_available_keys) - costs[MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH]= + costs[MERGE_SORT_ORDER_BY_FIELDS]= get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length, DEFAULT_KEY_COMPARE_COST, false); - if (param->using_addon_fields()) + if (with_addon_fields) { /* Compute cost of merge sort *if* we strip addon fields. */ num_available_keys= memory_available / row_length; row_length= param->rec_length + sizeof(char *); if (num_available_keys) - costs[MERGE_SORT_ALL_FIELDS_FIXED_LENGTH]= + costs[MERGE_SORT_ALL_FIELDS]= get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length, DEFAULT_KEY_COMPARE_COST, true); @@ -288,10 +288,13 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param, } void Sort_costs::compute_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available) + size_t memory_available, + bool with_addon_fields) { - compute_pq_sort_costs(param, num_rows, memory_available); - compute_merge_sort_costs(param, num_rows, memory_available); + compute_pq_sort_costs(param, num_rows, memory_available, + with_addon_fields); + compute_merge_sort_costs(param, num_rows, memory_available, + with_addon_fields); compute_fastest_sort(); } @@ -393,3 +396,49 @@ void Filesort_buffer::sort_buffer(const Sort_param *param, uint count) param->get_compare_function(), param->get_compare_argument(&size)); } + + +static +size_t get_sort_length(THD *thd, Item *item) +{ + SORT_FIELD_ATTR sort_attr; + item->type_handler()->sort_length(thd, item, &sort_attr); + + return sort_attr.length + (item->maybe_null() ? 1 : 0); +} + + +double cost_for_order_by(TABLE *table, ORDER *order_by, ha_rows limit_rows) +{ + size_t memory_available= static_cast<size_t>( + table->in_use->variables.sortbuff_size); + size_t sort_len= 0; + ha_rows num_rows= table->file->estimate_rows_upper_bound(); + bool with_addon_fields; + Sort_costs costs; + Sort_param param; + uint addon_field_length, num_addon_fields, num_nullable_fields, + packable_length; + + + /* Fill in the Sort_param structure so we can compute the sort costs. */ + for (ORDER *ptr= order_by; ptr != nullptr; ptr= ptr->next) + { + //TODO(cvicentiu) do we need to take thd->variables.max_sort_length into + //account? + sort_len+= get_sort_length(table->in_use, ptr->item_ptr); + } + + with_addon_fields= + filesort_use_addons(table, sort_len, &addon_field_length, + &num_addon_fields, &num_nullable_fields, + &packable_length); + + param.setup_lengths_and_limit(table, sort_len, with_addon_fields, + addon_field_length, limit_rows); + + costs.compute_sort_costs(¶m, num_rows, memory_available, + with_addon_fields); + + return costs.fastest_sort; +} diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h index 382a98e873f..d7a9cb24552 100644 --- a/sql/filesort_utils.h +++ b/sql/filesort_utils.h @@ -19,10 +19,8 @@ #include "my_global.h" #include "my_base.h" #include "sql_array.h" +#include "handler.h" -#include <utility> - -class TABLE; class Sort_param; /** @@ -52,24 +50,32 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows, -/* -Merge Sort -> Without addon Fields, with fixed length -Merge Sort -> Without addon Fields, with dynamic length -Merge Sort -> With addon Fields, with fixed length -Merge Sort -> With addon Fields, with dynamic length +/** + These are the current sorting algorithms we compute cost for: + + PQ_SORT_ALL_FIELDS Sort via priority queue, with addon fields. + PQ_SORT_ORDER_BY_FIELDS Sort via priority queue, without addon fields. + + MERGE_SORT_ALL_FIELDS Sort via merge sort, with addon fields. + MERGE_SORT_ORDER_BY_FIELDS Sort via merge sort, without addon fields. + + Note: + There is the possibility to do merge-sorting with dynamic length fields. + This is more expensive than if there are only fixed length fields, + however we do not (yet) account for that extra cost. We can extend the + cost computation in the future to cover that case as well. -Priority queue -> Without addon fields -Priority queue -> With addon fields + Effectively there are 4 possible combinations for merge sort: + With/without addon fields + With/without dynamic length fields. */ enum sort_type { PQ_SORT_ALL_FIELDS= 0, PQ_SORT_ORDER_BY_FIELDS, - MERGE_SORT_ALL_FIELDS_FIXED_LENGTH, - MERGE_SORT_ALL_FIELDS_DYNAMIC_LENGTH, - MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH, - MERGE_SORT_ORDER_BY_FIELDS_DYNAMIC_LENGTH, + MERGE_SORT_ALL_FIELDS, + MERGE_SORT_ORDER_BY_FIELDS, FINAL_SORT_TYPE, @@ -82,7 +88,8 @@ struct Sort_costs fastest_sort(NO_SORT_POSSIBLE_OUT_OF_MEM), lowest_cost(DBL_MAX) {} void compute_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available); + size_t memory_available, + bool with_addon_fields); /* Cache value for fastest_sort. */ enum sort_type fastest_sort; @@ -97,9 +104,11 @@ private: double costs[FINAL_SORT_TYPE]; void compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available); + size_t memory_available, + bool with_addon_fields); void compute_merge_sort_costs(Sort_param *param, ha_rows num_rows, - size_t memory_available); + size_t memory_available, + bool with_addon_fields); void compute_fastest_sort(); }; @@ -338,6 +347,8 @@ private: longlong m_idx; }; +double cost_for_order_by(TABLE *table, ORDER *order_by); + int compare_packed_sort_keys(void *sort_keys, unsigned char **a, unsigned char **b); qsort2_cmp get_packed_keys_compare_ptr(); |