diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2021-11-26 20:03:08 +0300 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2022-01-19 18:10:11 +0300 |
commit | eb6a9ad705009fc0e4b0654c862c23a28872dad2 (patch) | |
tree | 94e82af5d02321544a968dc14d2d501285624a54 /sql/opt_histogram_json.cc | |
parent | 106c785e2d3357a6f6711907f9e20b04c6ec7f96 (diff) | |
download | mariadb-git-eb6a9ad705009fc0e4b0654c862c23a28872dad2.tar.gz |
MDEV-26886: Estimation for filtered rows less precise with JSON histogram
- Make Histogram_json_hb::range_selectivity handle singleton buckets
specially when computing selectivity of the max. endpoint bound.
(for min. endpoint, we already do that).
- Also, fixed comments for Histogram_json_hb::find_bucket
Diffstat (limited to 'sql/opt_histogram_json.cc')
-rw-r--r-- | sql/opt_histogram_json.cc | 72 |
1 files changed, 44 insertions, 28 deletions
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 44b058761a4..82828a35f44 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -743,9 +743,22 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, idx--; } double left_fract= get_left_fract(idx); - double sel= position_in_interval(field, max_key, max_key_len, - buckets[idx].start_value, - get_end_value(idx)); + + double sel; + /* Special handling for singleton buckets */ + if (buckets[idx].ndv == 1 && equal) + { + if (inclusive_endp) + sel= 1.0; + else + sel= 0.0; + } + else + { + sel= position_in_interval(field, max_key, max_key_len, + buckets[idx].start_value, + get_end_value(idx)); + } max= left_fract + sel * (buckets[idx].cum_fract - left_fract); } else @@ -763,26 +776,18 @@ void Histogram_json_hb::serialize(Field *field) /* - Find the rightmost histogram bucket such that "lookup_val $GT start_value". - - $GT is either '>' or '>=' depending on equal_is_less parameter. - - @param equal_is_less Controls what to do if a histogram bound is equal to the - lookup_val. - - @detail - Possible cases: - 1. The regular case: the value falls into some bucket. + @brief + Find the leftmost histogram bucket such that "lookup_val >= start_value". - 2. The value is less than the minimum of the first bucket - 3. The value is greater than the maximum of the last bucket - In these cases we "clip" to the first/last bucket. + @param field Field object (used to do value comparisons) + @param lookup_val The lookup value in KeyTupleFormat. + @param equal OUT TRUE<=> the found bucket has left_bound=lookup_val - 4. The value hits the bucket boundary. Then, we need to know whether the - point of interest is to the left the constant, or to the right of it. + @return + The bucket index */ -int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, +int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val, bool *equal) { int res; @@ -797,7 +802,8 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, if (!res) { *equal= true; - return middle; + low= middle; + goto end; } else if (res < 0) low= middle; @@ -806,25 +812,25 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, } /* - If low and high were assigned a value in the above loop, then they are not - equal to the lookup value: + If low and high were assigned a value in the above loop and we got here, + then the following holds: - bucket[low] < lookup_val < bucket[high] + bucket[low].start_value < lookup_val < bucket[high].start_value - But there are two special cases: low=0 and high=last_bucket. Handle them - below. + Besides that, there are two special cases: low=0 and high=last_bucket. + Handle them below. */ if (low == 0) { res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val); if (!res) *equal= true; - else if (res < 0) + else if (res < 0) // buckets[0] < lookup_val { res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val); if (!res) *equal= true; - if (res >= 0) + if (res <= 0) // buckets[high] <= lookup_val low= high; } } @@ -833,9 +839,19 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val); if (!res) *equal= true; - if (res >= 0) + if (res <= 0) low= high; } +end: + // Verification: *equal==TRUE <=> lookup value is equal to the found bucket. + DBUG_ASSERT(*equal == !(field->key_cmp((uchar*)buckets[low].start_value.data(), + lookup_val))); + // buckets[low] <= lookup_val, with one exception of the first bucket. + DBUG_ASSERT(low == 0 || + field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0); + // buckets[low+1] > lookup_val, with one exception of the last bucket + DBUG_ASSERT(low == (int)buckets.size()-1 || + field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0); return low; } |