summaryrefslogtreecommitdiff
path: root/sql/opt_histogram_json.cc
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2021-11-26 20:03:08 +0300
committerSergei Petrunia <psergey@askmonty.org>2022-01-19 18:10:11 +0300
commiteb6a9ad705009fc0e4b0654c862c23a28872dad2 (patch)
tree94e82af5d02321544a968dc14d2d501285624a54 /sql/opt_histogram_json.cc
parent106c785e2d3357a6f6711907f9e20b04c6ec7f96 (diff)
downloadmariadb-git-eb6a9ad705009fc0e4b0654c862c23a28872dad2.tar.gz
MDEV-26886: Estimation for filtered rows less precise with JSON histogram
- Make Histogram_json_hb::range_selectivity handle singleton buckets specially when computing selectivity of the max. endpoint bound. (for min. endpoint, we already do that). - Also, fixed comments for Histogram_json_hb::find_bucket
Diffstat (limited to 'sql/opt_histogram_json.cc')
-rw-r--r--sql/opt_histogram_json.cc72
1 files changed, 44 insertions, 28 deletions
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
index 44b058761a4..82828a35f44 100644
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@@ -743,9 +743,22 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
idx--;
}
double left_fract= get_left_fract(idx);
- double sel= position_in_interval(field, max_key, max_key_len,
- buckets[idx].start_value,
- get_end_value(idx));
+
+ double sel;
+ /* Special handling for singleton buckets */
+ if (buckets[idx].ndv == 1 && equal)
+ {
+ if (inclusive_endp)
+ sel= 1.0;
+ else
+ sel= 0.0;
+ }
+ else
+ {
+ sel= position_in_interval(field, max_key, max_key_len,
+ buckets[idx].start_value,
+ get_end_value(idx));
+ }
max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
}
else
@@ -763,26 +776,18 @@ void Histogram_json_hb::serialize(Field *field)
/*
- Find the rightmost histogram bucket such that "lookup_val $GT start_value".
-
- $GT is either '>' or '>=' depending on equal_is_less parameter.
-
- @param equal_is_less Controls what to do if a histogram bound is equal to the
- lookup_val.
-
- @detail
- Possible cases:
- 1. The regular case: the value falls into some bucket.
+ @brief
+ Find the leftmost histogram bucket such that "lookup_val >= start_value".
- 2. The value is less than the minimum of the first bucket
- 3. The value is greater than the maximum of the last bucket
- In these cases we "clip" to the first/last bucket.
+ @param field Field object (used to do value comparisons)
+ @param lookup_val The lookup value in KeyTupleFormat.
+ @param equal OUT TRUE<=> the found bucket has left_bound=lookup_val
- 4. The value hits the bucket boundary. Then, we need to know whether the
- point of interest is to the left the constant, or to the right of it.
+ @return
+ The bucket index
*/
-int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
+int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
bool *equal)
{
int res;
@@ -797,7 +802,8 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
if (!res)
{
*equal= true;
- return middle;
+ low= middle;
+ goto end;
}
else if (res < 0)
low= middle;
@@ -806,25 +812,25 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
}
/*
- If low and high were assigned a value in the above loop, then they are not
- equal to the lookup value:
+ If low and high were assigned a value in the above loop and we got here,
+ then the following holds:
- bucket[low] < lookup_val < bucket[high]
+ bucket[low].start_value < lookup_val < bucket[high].start_value
- But there are two special cases: low=0 and high=last_bucket. Handle them
- below.
+ Besides that, there are two special cases: low=0 and high=last_bucket.
+ Handle them below.
*/
if (low == 0)
{
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
if (!res)
*equal= true;
- else if (res < 0)
+ else if (res < 0) // buckets[0] < lookup_val
{
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
- if (res >= 0)
+ if (res <= 0) // buckets[high] <= lookup_val
low= high;
}
}
@@ -833,9 +839,19 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
- if (res >= 0)
+ if (res <= 0)
low= high;
}
+end:
+ // Verification: *equal==TRUE <=> lookup value is equal to the found bucket.
+ DBUG_ASSERT(*equal == !(field->key_cmp((uchar*)buckets[low].start_value.data(),
+ lookup_val)));
+ // buckets[low] <= lookup_val, with one exception of the first bucket.
+ DBUG_ASSERT(low == 0 ||
+ field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0);
+ // buckets[low+1] > lookup_val, with one exception of the last bucket
+ DBUG_ASSERT(low == (int)buckets.size()-1 ||
+ field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0);
return low;
}