summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicențiu Ciorbaru <vicentiu@mariadb.org>2022-07-02 22:15:22 +0300
committerVicențiu Ciorbaru <vicentiu@mariadb.org>2022-07-03 01:36:29 +0300
commitbaf8be9f67a1a3d795e8c7841615ad6b4dc09328 (patch)
tree34c89a3ce12019f4af6d34c1b19343333569a333
parentbb30310e1a35ef5989bcfe4ab18063739d91cb17 (diff)
downloadmariadb-git-10.7-vicentiu-selectivity.tar.gz
Implement cost_for_order_by function10.7-vicentiu-selectivity
The sort length is extracted similarly to how sortlength() function does it. The function makes use of filesort_use_addons function to compute the length of addon fields. Finally, by calling compute_sort_costs we get the fastest_sort possible. Other changes: * Sort_param::using_addon_fields() assumes addon fields are already allocated. This makes the use of Sort_param unusable for compute_sort_costs *if* we don't want to allocate addon fields. As a preliminary fix, pass "with_addon_fields" as bool value to compute_sort_costs() and make the internal functions use that value instead of Sort_param::using_addon_fields() method. The ideal fix would be to define a "leaner" struct with only the necessary members, but this can be done as a separate commit. * Remove unnecessary fields from Sort_costs::costs array. As we do not yet differentiate between merge sort with fixed length vs dynamic length fields, eliminate that differentiation from enum sort_type. It can be added at a later date when we indeed have a cost differentiation. * Fixup comments
-rw-r--r--sql/filesort.cc3
-rw-r--r--sql/filesort_utils.cc75
-rw-r--r--sql/filesort_utils.h45
3 files changed, 92 insertions, 31 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 54154297a8d..6ff194fd6cf 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -280,7 +280,8 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
num_rows= table->file->estimate_rows_upper_bound();
Sort_costs costs;
- costs.compute_sort_costs(&param, num_rows, memory_available);
+ costs.compute_sort_costs(&param, num_rows, memory_available,
+ param.using_addon_fields());
if (costs.fastest_sort == NO_SORT_POSSIBLE_OUT_OF_MEM)
{
diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc
index 2644e85aa6c..79323cea3ce 100644
--- a/sql/filesort_utils.cc
+++ b/sql/filesort_utils.cc
@@ -198,7 +198,8 @@ void Sort_costs::compute_fastest_sort()
*/
void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
- size_t memory_available)
+ size_t memory_available,
+ bool with_addon_fields)
{
/*
Implementation detail of PQ. To be able to keep a PQ of size N we need
@@ -233,7 +234,7 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
}
/* Calculate cost with addon fields */
- if (param->using_addon_fields())
+ if (with_addon_fields)
{
row_length= param->rec_length + sizeof(char *);
num_available_keys= memory_available / row_length;
@@ -250,31 +251,30 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
void Sort_costs::compute_merge_sort_costs(Sort_param *param,
ha_rows num_rows,
- size_t memory_available)
+ size_t memory_available,
+ bool with_addon_fields)
{
size_t row_length= param->sort_length + param->ref_length + sizeof(char *);
size_t num_available_keys= memory_available / row_length;
- costs[MERGE_SORT_ALL_FIELDS_FIXED_LENGTH]= DBL_MAX;
- costs[MERGE_SORT_ALL_FIELDS_DYNAMIC_LENGTH]= DBL_MAX;
- costs[MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH]= DBL_MAX;
- costs[MERGE_SORT_ORDER_BY_FIELDS_DYNAMIC_LENGTH]= DBL_MAX;
+ costs[MERGE_SORT_ALL_FIELDS]= DBL_MAX;
+ costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX;
if (num_available_keys)
- costs[MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH]=
+ costs[MERGE_SORT_ORDER_BY_FIELDS]=
get_merge_many_buffs_cost_fast(num_rows, num_available_keys,
row_length,
DEFAULT_KEY_COMPARE_COST,
false);
- if (param->using_addon_fields())
+ if (with_addon_fields)
{
/* Compute cost of merge sort *if* we strip addon fields. */
num_available_keys= memory_available / row_length;
row_length= param->rec_length + sizeof(char *);
if (num_available_keys)
- costs[MERGE_SORT_ALL_FIELDS_FIXED_LENGTH]=
+ costs[MERGE_SORT_ALL_FIELDS]=
get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length,
DEFAULT_KEY_COMPARE_COST,
true);
@@ -288,10 +288,13 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param,
}
void Sort_costs::compute_sort_costs(Sort_param *param, ha_rows num_rows,
- size_t memory_available)
+ size_t memory_available,
+ bool with_addon_fields)
{
- compute_pq_sort_costs(param, num_rows, memory_available);
- compute_merge_sort_costs(param, num_rows, memory_available);
+ compute_pq_sort_costs(param, num_rows, memory_available,
+ with_addon_fields);
+ compute_merge_sort_costs(param, num_rows, memory_available,
+ with_addon_fields);
compute_fastest_sort();
}
@@ -393,3 +396,49 @@ void Filesort_buffer::sort_buffer(const Sort_param *param, uint count)
param->get_compare_function(),
param->get_compare_argument(&size));
}
+
+
+static
+size_t get_sort_length(THD *thd, Item *item)
+{
+ SORT_FIELD_ATTR sort_attr;
+ item->type_handler()->sort_length(thd, item, &sort_attr);
+
+ return sort_attr.length + (item->maybe_null() ? 1 : 0);
+}
+
+
+double cost_for_order_by(TABLE *table, ORDER *order_by, ha_rows limit_rows)
+{
+ size_t memory_available= static_cast<size_t>(
+ table->in_use->variables.sortbuff_size);
+ size_t sort_len= 0;
+ ha_rows num_rows= table->file->estimate_rows_upper_bound();
+ bool with_addon_fields;
+ Sort_costs costs;
+ Sort_param param;
+ uint addon_field_length, num_addon_fields, num_nullable_fields,
+ packable_length;
+
+
+ /* Fill in the Sort_param structure so we can compute the sort costs. */
+ for (ORDER *ptr= order_by; ptr != nullptr; ptr= ptr->next)
+ {
+ //TODO(cvicentiu) do we need to take thd->variables.max_sort_length into
+ //account?
+ sort_len+= get_sort_length(table->in_use, ptr->item_ptr);
+ }
+
+ with_addon_fields=
+ filesort_use_addons(table, sort_len, &addon_field_length,
+ &num_addon_fields, &num_nullable_fields,
+ &packable_length);
+
+ param.setup_lengths_and_limit(table, sort_len, with_addon_fields,
+ addon_field_length, limit_rows);
+
+ costs.compute_sort_costs(&param, num_rows, memory_available,
+ with_addon_fields);
+
+ return costs.fastest_sort;
+}
diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h
index 382a98e873f..d7a9cb24552 100644
--- a/sql/filesort_utils.h
+++ b/sql/filesort_utils.h
@@ -19,10 +19,8 @@
#include "my_global.h"
#include "my_base.h"
#include "sql_array.h"
+#include "handler.h"
-#include <utility>
-
-class TABLE;
class Sort_param;
/**
@@ -52,24 +50,32 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
-/*
-Merge Sort -> Without addon Fields, with fixed length
-Merge Sort -> Without addon Fields, with dynamic length
-Merge Sort -> With addon Fields, with fixed length
-Merge Sort -> With addon Fields, with dynamic length
+/**
+ These are the current sorting algorithms we compute cost for:
+
+ PQ_SORT_ALL_FIELDS Sort via priority queue, with addon fields.
+ PQ_SORT_ORDER_BY_FIELDS Sort via priority queue, without addon fields.
+
+ MERGE_SORT_ALL_FIELDS Sort via merge sort, with addon fields.
+ MERGE_SORT_ORDER_BY_FIELDS Sort via merge sort, without addon fields.
+
+ Note:
+ There is the possibility to do merge-sorting with dynamic length fields.
+ This is more expensive than if there are only fixed length fields,
+ however we do not (yet) account for that extra cost. We can extend the
+ cost computation in the future to cover that case as well.
-Priority queue -> Without addon fields
-Priority queue -> With addon fields
+ Effectively there are 4 possible combinations for merge sort:
+ With/without addon fields
+ With/without dynamic length fields.
*/
enum sort_type
{
PQ_SORT_ALL_FIELDS= 0,
PQ_SORT_ORDER_BY_FIELDS,
- MERGE_SORT_ALL_FIELDS_FIXED_LENGTH,
- MERGE_SORT_ALL_FIELDS_DYNAMIC_LENGTH,
- MERGE_SORT_ORDER_BY_FIELDS_FIXED_LENGTH,
- MERGE_SORT_ORDER_BY_FIELDS_DYNAMIC_LENGTH,
+ MERGE_SORT_ALL_FIELDS,
+ MERGE_SORT_ORDER_BY_FIELDS,
FINAL_SORT_TYPE,
@@ -82,7 +88,8 @@ struct Sort_costs
fastest_sort(NO_SORT_POSSIBLE_OUT_OF_MEM), lowest_cost(DBL_MAX) {}
void compute_sort_costs(Sort_param *param, ha_rows num_rows,
- size_t memory_available);
+ size_t memory_available,
+ bool with_addon_fields);
/* Cache value for fastest_sort. */
enum sort_type fastest_sort;
@@ -97,9 +104,11 @@ private:
double costs[FINAL_SORT_TYPE];
void compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
- size_t memory_available);
+ size_t memory_available,
+ bool with_addon_fields);
void compute_merge_sort_costs(Sort_param *param, ha_rows num_rows,
- size_t memory_available);
+ size_t memory_available,
+ bool with_addon_fields);
void compute_fastest_sort();
};
@@ -338,6 +347,8 @@ private:
longlong m_idx;
};
+double cost_for_order_by(TABLE *table, ORDER *order_by);
+
int compare_packed_sort_keys(void *sort_keys, unsigned char **a,
unsigned char **b);
qsort2_cmp get_packed_keys_compare_ptr();