diff options
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 74 | ||||
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 344 | ||||
-rw-r--r-- | storage/innobase/dict/dict0stats.cc | 463 | ||||
-rw-r--r-- | storage/innobase/include/btr0btr.h | 24 | ||||
-rw-r--r-- | storage/innobase/include/btr0cur.h | 31 |
5 files changed, 424 insertions, 512 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 56e84e47fda..02aa89361c5 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -274,38 +274,6 @@ btr_root_get( } /**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -ulint -btr_height_get( -/*===========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint height=0; - buf_block_t* root_block; - - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - - /* S latches the page */ - root_block = btr_root_block_get(index, RW_S_LATCH, mtr); - - if (root_block) { - height = btr_page_get_level(buf_block_get_frame(root_block)); - - /* Release the S latch on the root page. */ - mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX); - } - - return(height); -} - -/**************************************************************//** Checks a file segment header within a B-tree root page and updates the segment header space id. @return TRUE if valid */ @@ -563,48 +531,6 @@ btr_page_alloc( } /**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -ulint -btr_get_size( -/*=========*/ - const dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ -{ - ulint n=0; - - ut_ad(srv_read_only_mode - || mtr->memo_contains(index->lock, MTR_MEMO_S_LOCK)); - ut_ad(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE); - - if (dict_index_is_online_ddl(index) - || !index->is_committed()) { - return(ULINT_UNDEFINED); - } - - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - if (!root) { - return ULINT_UNDEFINED; - } - mtr->x_lock_space(index->table->space); - if (flag == BTR_N_LEAF_PAGES) { - fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &n, mtr); - } else { - ulint dummy; - n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP - + root->frame, &dummy, mtr); - n += fseg_n_reserved_pages(*root, - PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &dummy, mtr); - } - - return(n); -} - -/**************************************************************//** Frees a page used in an ibuf tree. Puts the page to the free list of the ibuf tree. */ static diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index adba676808c..32bdf9a8a51 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -137,17 +137,6 @@ can be released by page reorganize, then it is reorganized */ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ -/** Estimated table level stats from sampled value. -@param value sampled stats -@param index index being sampled -@param sample number of sampled rows -@param ext_size external stored data size -@param not_empty table not empty -@return estimated table wide stats from sampled value */ -#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \ - (((value) * static_cast<ib_uint64_t>(index->stat_n_leaf_pages) \ - + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) - /* @} */ /*******************************************************************//** @@ -6552,339 +6541,6 @@ btr_estimate_n_rows_in_range( index, tuple1, tuple2, 1); } -/*******************************************************************//** -Record the number of non_null key values in a given index for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are eventually stored in the array: -index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ -static -void -btr_record_not_null_field_in_rec( -/*=============================*/ - ulint n_unique, /*!< in: dict_index_get_n_unique(index), - number of columns uniquely determine - an index entry */ - const rec_offs* offsets, /*!< in: rec_get_offsets(rec, index), - its size could be for all fields or - that of "n_unique" */ - ib_uint64_t* n_not_null) /*!< in/out: array to record number of - not null rows for n-column prefix */ -{ - ulint i; - - ut_ad(rec_offs_n_fields(offsets) >= n_unique); - - if (n_not_null == NULL) { - return; - } - - for (i = 0; i < n_unique; i++) { - if (rec_offs_nth_sql_null(offsets, i)) { - break; - } - - n_not_null[i]++; - } -} - -/** Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -result.n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array result.n_non_null_key_vals. -@param[in] index index -@return vector with statistics information -empty vector if the index is unavailable. */ -std::vector<index_field_stats_t> -btr_estimate_number_of_different_key_vals(dict_index_t* index) -{ - btr_cur_t cursor; - page_t* page; - rec_t* rec; - ulint n_cols; - ib_uint64_t* n_diff; - ib_uint64_t* n_not_null; - ibool stats_null_not_equal; - uintmax_t n_sample_pages=1; /* number of pages to sample */ - ulint not_empty_flag = 0; - ulint total_external_size = 0; - ulint i; - ulint j; - uintmax_t add_on; - mtr_t mtr; - mem_heap_t* heap = NULL; - rec_offs* offsets_rec = NULL; - rec_offs* offsets_next_rec = NULL; - - std::vector<index_field_stats_t> result; - - /* For spatial index, there is no such stats can be - fetched. */ - ut_ad(!dict_index_is_spatial(index)); - - n_cols = dict_index_get_n_unique(index); - - heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) - * n_cols - + dict_index_get_n_fields(index) - * (sizeof *offsets_rec - + sizeof *offsets_next_rec)); - - n_diff = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof(n_diff[0])); - - n_not_null = NULL; - - /* Check srv_innodb_stats_method setting, and decide whether we - need to record non-null value and also decide if NULL is - considered equal (by setting stats_null_not_equal value) */ - switch (srv_innodb_stats_method) { - case SRV_STATS_NULLS_IGNORED: - n_not_null = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof *n_not_null); - /* fall through */ - - case SRV_STATS_NULLS_UNEQUAL: - /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL - case, we will treat NULLs as unequal value */ - stats_null_not_equal = TRUE; - break; - - case SRV_STATS_NULLS_EQUAL: - stats_null_not_equal = FALSE; - break; - - default: - ut_error; - } - - if (srv_stats_sample_traditional) { - /* It makes no sense to test more pages than are contained - in the index, thus we lower the number if it is too high */ - if (srv_stats_transient_sample_pages > index->stat_index_size) { - if (index->stat_index_size > 0) { - n_sample_pages = index->stat_index_size; - } - } else { - n_sample_pages = srv_stats_transient_sample_pages; - } - } else { - /* New logaritmic number of pages that are estimated. - Number of pages estimated should be between 1 and - index->stat_index_size. - - If we have only 0 or 1 index pages then we can only take 1 - sample. We have already initialized n_sample_pages to 1. - - So taking index size as I and sample as S and log(I)*S as L - - requirement 1) we want the out limit of the expression to not exceed I; - requirement 2) we want the ideal pages to be at least S; - so the current expression is min(I, max( min(S,I), L) - - looking for simplifications: - - case 1: assume S < I - min(I, max( min(S,I), L) -> min(I , max( S, L)) - - but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. - - so we have: min(I , L) - - case 2: assume I < S - min(I, max( min(S,I), L) -> min(I, max( I, L)) - - case 2a: L > I - min(I, max( I, L)) -> min(I, L) -> I - - case 2b: when L < I - min(I, max( I, L)) -> min(I, I ) -> I - - so taking all case2 paths is I, our expression is: - n_pages = S < I? min(I,L) : I - */ - if (index->stat_index_size > 1) { - n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) - ? ut_min(index->stat_index_size, - static_cast<ulint>( - log2(double(index->stat_index_size)) - * double(srv_stats_transient_sample_pages))) - : index->stat_index_size; - } - } - - /* Sanity check */ - ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); - - /* We sample some pages in the index to get an estimate */ - - for (i = 0; i < n_sample_pages; i++) { - mtr_start(&mtr); - - bool available; - - available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, - &cursor, &mtr); - - if (!available) { - mtr_commit(&mtr); - mem_heap_free(heap); - - return result; - } - - /* Count the number of different key values for each prefix of - the key on this index page. If the prefix does not determine - the index record uniquely in the B-tree, then we subtract one - because otherwise our algorithm would give a wrong estimate - for an index where there is just one key value. */ - - if (!index->is_readable()) { - mtr_commit(&mtr); - goto exit_loop; - } - - page = btr_cur_get_page(&cursor); - - rec = page_rec_get_next(page_get_infimum_rec(page)); - const ulint n_core = page_is_leaf(page) - ? index->n_core_fields : 0; - - if (!page_rec_is_supremum(rec)) { - not_empty_flag = 1; - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - n_core, - ULINT_UNDEFINED, &heap); - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_rec, n_not_null); - } - } - - while (!page_rec_is_supremum(rec)) { - ulint matched_fields; - rec_t* next_rec = page_rec_get_next(rec); - if (page_rec_is_supremum(next_rec)) { - total_external_size += - btr_rec_get_externally_stored_len( - rec, offsets_rec); - break; - } - - offsets_next_rec = rec_get_offsets(next_rec, index, - offsets_next_rec, - n_core, - ULINT_UNDEFINED, - &heap); - - cmp_rec_rec(rec, next_rec, - offsets_rec, offsets_next_rec, - index, stats_null_not_equal, - &matched_fields); - - for (j = matched_fields; j < n_cols; j++) { - /* We add one if this index record has - a different prefix from the previous */ - - n_diff[j]++; - } - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_next_rec, n_not_null); - } - - total_external_size - += btr_rec_get_externally_stored_len( - rec, offsets_rec); - - rec = next_rec; - /* Initialize offsets_rec for the next round - and assign the old offsets_rec buffer to - offsets_next_rec. */ - { - rec_offs* offsets_tmp = offsets_rec; - offsets_rec = offsets_next_rec; - offsets_next_rec = offsets_tmp; - } - } - - if (n_cols == dict_index_get_n_unique_in_tree(index) - && page_has_siblings(page)) { - - /* If there is more than one leaf page in the tree, - we add one because we know that the first record - on the page certainly had a different prefix than the - last record on the previous index page in the - alphabetical order. Before this fix, if there was - just one big record on each clustered index page, the - algorithm grossly underestimated the number of rows - in the table. */ - - n_diff[n_cols - 1]++; - } - - mtr_commit(&mtr); - } - -exit_loop: - /* If we saw k borders between different key values on - n_sample_pages leaf pages, we can estimate how many - there will be in index->stat_n_leaf_pages */ - - /* We must take into account that our sample actually represents - also the pages used for external storage of fields (those pages are - included in index->stat_n_leaf_pages) */ - - result.reserve(n_cols); - - for (j = 0; j < n_cols; j++) { - index_field_stats_t stat; - - stat.n_diff_key_vals - = BTR_TABLE_STATS_FROM_SAMPLE( - n_diff[j], index, n_sample_pages, - total_external_size, not_empty_flag); - - /* If the tree is small, smaller than - 10 * n_sample_pages + total_external_size, then - the above estimate is ok. For bigger trees it is common that we - do not see any borders between key values in the few pages - we pick. But still there may be n_sample_pages - different key values, or even more. Let us try to approximate - that: */ - - add_on = index->stat_n_leaf_pages - / (10 * (n_sample_pages - + total_external_size)); - - if (add_on > n_sample_pages) { - add_on = n_sample_pages; - } - - stat.n_diff_key_vals += add_on; - - stat.n_sample_sizes = n_sample_pages; - - if (n_not_null != NULL) { - stat.n_non_null_key_vals = - BTR_TABLE_STATS_FROM_SAMPLE( - n_not_null[j], index, n_sample_pages, - total_external_size, not_empty_flag); - } - - result.push_back(stat); - } - - mem_heap_free(heap); - - return result; -} - /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ /***********************************************************//** diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index d7466ae5f8a..f92beac0f67 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1024,6 +1024,367 @@ dict_stats_snapshot_free( dict_stats_table_clone_free(t); } +/** Statistics for one field of an index. */ +struct index_field_stats_t +{ + ib_uint64_t n_diff_key_vals; + ib_uint64_t n_sample_sizes; + ib_uint64_t n_non_null_key_vals; + + index_field_stats_t(ib_uint64_t n_diff_key_vals= 0, + ib_uint64_t n_sample_sizes= 0, + ib_uint64_t n_non_null_key_vals= 0) + : n_diff_key_vals(n_diff_key_vals), n_sample_sizes(n_sample_sizes), + n_non_null_key_vals(n_non_null_key_vals) + { + } +}; + +/*******************************************************************//** +Record the number of non_null key values in a given index for +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const rec_offs* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_uint64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + if (rec_offs_nth_sql_null(offsets, i)) { + break; + } + + n_not_null[i]++; + } +} + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \ + (((value) * static_cast<ib_uint64_t>(index->stat_n_leaf_pages) \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + +/** Estimates the number of different key values in a given index, for +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed +0..n_uniq-1) and the number of pages that were sampled is saved in +result.n_sample_sizes[]. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array result.n_non_null_key_vals. +@param index B-tree index +@param bulk_trx_id the value of index->table->bulk_trx_id at the start +@return vector with statistics information +empty vector if the index is unavailable. */ +static +std::vector<index_field_stats_t> +btr_estimate_number_of_different_key_vals(dict_index_t* index, + trx_id_t bulk_trx_id) +{ + btr_cur_t cursor; + page_t* page; + rec_t* rec; + ulint n_cols; + ib_uint64_t* n_diff; + ib_uint64_t* n_not_null; + ibool stats_null_not_equal; + uintmax_t n_sample_pages=1; /* number of pages to sample */ + ulint not_empty_flag = 0; + ulint total_external_size = 0; + ulint i; + ulint j; + uintmax_t add_on; + mtr_t mtr; + mem_heap_t* heap = NULL; + rec_offs* offsets_rec = NULL; + rec_offs* offsets_next_rec = NULL; + + std::vector<index_field_stats_t> result; + + ut_ad(!index->is_spatial()); + + n_cols = dict_index_get_n_unique(index); + + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * n_cols + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof(n_diff[0])); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } + + if (srv_stats_sample_traditional) { + /* It makes no sense to test more pages than are contained + in the index, thus we lower the number if it is too high */ + if (srv_stats_transient_sample_pages > index->stat_index_size) { + if (index->stat_index_size > 0) { + n_sample_pages = index->stat_index_size; + } + } else { + n_sample_pages = srv_stats_transient_sample_pages; + } + } else { + /* New logaritmic number of pages that are estimated. + Number of pages estimated should be between 1 and + index->stat_index_size. + + If we have only 0 or 1 index pages then we can only take 1 + sample. We have already initialized n_sample_pages to 1. + + So taking index size as I and sample as S and log(I)*S as L + + requirement 1) we want the out limit of the expression to not exceed I; + requirement 2) we want the ideal pages to be at least S; + so the current expression is min(I, max( min(S,I), L) + + looking for simplifications: + + case 1: assume S < I + min(I, max( min(S,I), L) -> min(I , max( S, L)) + + but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. + + so we have: min(I , L) + + case 2: assume I < S + min(I, max( min(S,I), L) -> min(I, max( I, L)) + + case 2a: L > I + min(I, max( I, L)) -> min(I, L) -> I + + case 2b: when L < I + min(I, max( I, L)) -> min(I, I ) -> I + + so taking all case2 paths is I, our expression is: + n_pages = S < I? min(I,L) : I + */ + if (index->stat_index_size > 1) { + n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) + ? ut_min(index->stat_index_size, + static_cast<ulint>( + log2(double(index->stat_index_size)) + * double(srv_stats_transient_sample_pages))) + : index->stat_index_size; + } + } + + /* Sanity check */ + ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); + + /* We sample some pages in the index to get an estimate */ + + for (i = 0; i < n_sample_pages; i++) { + mtr.start(); + + bool available; + + available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, + &cursor, &mtr); + + if (!available || index->table->bulk_trx_id != bulk_trx_id) { + mtr.commit(); + mem_heap_free(heap); + + return result; + } + + /* Count the number of different key values for each prefix of + the key on this index page. If the prefix does not determine + the index record uniquely in the B-tree, then we subtract one + because otherwise our algorithm would give a wrong estimate + for an index where there is just one key value. */ + + if (!index->is_readable()) { + mtr.commit(); + goto exit_loop; + } + + page = btr_cur_get_page(&cursor); + + rec = page_rec_get_next(page_get_infimum_rec(page)); + const ulint n_core = page_is_leaf(page) + ? index->n_core_fields : 0; + + if (!page_rec_is_supremum(rec)) { + not_empty_flag = 1; + offsets_rec = rec_get_offsets(rec, index, offsets_rec, + n_core, + ULINT_UNDEFINED, &heap); + + if (n_not_null != NULL) { + btr_record_not_null_field_in_rec( + n_cols, offsets_rec, n_not_null); + } + } + + while (!page_rec_is_supremum(rec)) { + ulint matched_fields; + rec_t* next_rec = page_rec_get_next(rec); + if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); + break; + } + + offsets_next_rec = rec_get_offsets(next_rec, index, + offsets_next_rec, + n_core, + ULINT_UNDEFINED, + &heap); + + cmp_rec_rec(rec, next_rec, + offsets_rec, offsets_next_rec, + index, stats_null_not_equal, + &matched_fields); + + for (j = matched_fields; j < n_cols; j++) { + /* We add one if this index record has + a different prefix from the previous */ + + n_diff[j]++; + } + + if (n_not_null != NULL) { + btr_record_not_null_field_in_rec( + n_cols, offsets_next_rec, n_not_null); + } + + total_external_size + += btr_rec_get_externally_stored_len( + rec, offsets_rec); + + rec = next_rec; + /* Initialize offsets_rec for the next round + and assign the old offsets_rec buffer to + offsets_next_rec. */ + { + rec_offs* offsets_tmp = offsets_rec; + offsets_rec = offsets_next_rec; + offsets_next_rec = offsets_tmp; + } + } + + if (n_cols == dict_index_get_n_unique_in_tree(index) + && page_has_siblings(page)) { + + /* If there is more than one leaf page in the tree, + we add one because we know that the first record + on the page certainly had a different prefix than the + last record on the previous index page in the + alphabetical order. Before this fix, if there was + just one big record on each clustered index page, the + algorithm grossly underestimated the number of rows + in the table. */ + + n_diff[n_cols - 1]++; + } + + mtr.commit(); + } + +exit_loop: + /* If we saw k borders between different key values on + n_sample_pages leaf pages, we can estimate how many + there will be in index->stat_n_leaf_pages */ + + /* We must take into account that our sample actually represents + also the pages used for external storage of fields (those pages are + included in index->stat_n_leaf_pages) */ + + result.reserve(n_cols); + + for (j = 0; j < n_cols; j++) { + index_field_stats_t stat; + + stat.n_diff_key_vals + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); + + /* If the tree is small, smaller than + 10 * n_sample_pages + total_external_size, then + the above estimate is ok. For bigger trees it is common that we + do not see any borders between key values in the few pages + we pick. But still there may be n_sample_pages + different key values, or even more. Let us try to approximate + that: */ + + add_on = index->stat_n_leaf_pages + / (10 * (n_sample_pages + + total_external_size)); + + if (add_on > n_sample_pages) { + add_on = n_sample_pages; + } + + stat.n_diff_key_vals += add_on; + + stat.n_sample_sizes = n_sample_pages; + + if (n_not_null != NULL) { + stat.n_non_null_key_vals = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); + } + + result.push_back(stat); + } + + mem_heap_free(heap); + + return result; +} + /*********************************************************************//** Calculates new estimates for index statistics. This function is relatively quick and is used to calculate transient statistics that @@ -1054,39 +1415,48 @@ dummy_empty: } else if (ibuf_debug && !dict_index_is_clust(index)) { goto dummy_empty; #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + } else if (dict_index_is_online_ddl(index) || !index->is_committed() + || !index->table->space) { + goto dummy_empty; } else { mtr_t mtr; - ulint size; mtr.start(); mtr_s_lock_index(index, &mtr); - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); - - if (size != ULINT_UNDEFINED) { - index->stat_index_size = size; + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, + &mtr); + if (!root) { +invalid: + mtr.commit(); + goto dummy_empty; + } - size = btr_get_size( - index, BTR_N_LEAF_PAGES, &mtr); + const auto bulk_trx_id = index->table->bulk_trx_id; + if (bulk_trx_id && trx_sys.find(nullptr, bulk_trx_id, false)) { + goto invalid; } - mtr.commit(); + mtr.x_lock_space(index->table->space); - switch (size) { - case ULINT_UNDEFINED: - goto dummy_empty; - case 0: - /* The root node of the tree is a leaf */ - size = 1; - } + ulint dummy, size; + index->stat_index_size + = fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_LEAF + + root->frame, &size, &mtr) + + fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_TOP + + root->frame, &dummy, &mtr); - index->stat_n_leaf_pages = size; + mtr.commit(); + + index->stat_n_leaf_pages = size ? size : 1; /* Do not continue if table decryption has failed or table is already marked as corrupted. */ if (index->is_readable()) { std::vector<index_field_stats_t> stats = btr_estimate_number_of_different_key_vals( - index); + index, bulk_trx_id); if (!stats.empty()) { index->table->stats_mutex_lock(); @@ -2037,7 +2407,7 @@ struct index_stats_t { stats.reserve(n_uniq); for (ulint i= 0; i < n_uniq; ++i) - stats.push_back(index_field_stats_t(0, 1, 0)); + stats.push_back(index_field_stats_t{0, 1, 0}); } }; @@ -2123,15 +2493,12 @@ stat_n_leaf_pages. This function can be slow. @return index stats */ static index_stats_t dict_stats_analyze_index(dict_index_t* index) { - ulint root_level; - ulint level; bool level_is_analyzed; ulint n_uniq; ulint n_prefix; ib_uint64_t total_recs; ib_uint64_t total_pages; mtr_t mtr; - ulint size; index_stats_t result(index->n_uniq); DBUG_ENTER("dict_stats_analyze_index"); @@ -2150,30 +2517,43 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) mtr.start(); mtr_s_lock_index(index, &mtr); - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); + uint16_t root_level; + + { + buf_block_t* root; + root = btr_root_block_get(index, RW_SX_LATCH, &mtr); + if (!root) { +empty_index: + mtr.commit(); + dict_stats_assert_initialized_index(index); + DBUG_RETURN(result); + } - if (size != ULINT_UNDEFINED) { - result.index_size = size; - size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr); + root_level = btr_page_get_level(root->frame); + + mtr.x_lock_space(index->table->space); + ulint dummy, size; + result.index_size + = fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_LEAF + + root->frame, &size, &mtr) + + fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_TOP + + root->frame, &dummy, &mtr); + result.n_leaf_pages = size ? size : 1; } - /* Release the X locks on the root page taken by btr_get_size() */ - mtr.commit(); - - switch (size) { - case ULINT_UNDEFINED: - dict_stats_assert_initialized_index(index); - DBUG_RETURN(result); - case 0: - /* The root node of the tree is a leaf */ - size = 1; + const auto bulk_trx_id = index->table->bulk_trx_id; + if (bulk_trx_id && trx_sys.find(nullptr, bulk_trx_id, false)) { + result.index_size = 1; + result.n_leaf_pages = 1; + goto empty_index; } - result.n_leaf_pages = size; + mtr.commit(); mtr.start(); mtr_sx_lock_index(index, &mtr); - root_level = btr_height_get(index, &mtr); n_uniq = dict_index_get_n_unique(index); @@ -2251,7 +2631,7 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) So if we find that the first level containing D distinct keys (on n_prefix columns) is L, we continue from L when searching for D distinct keys on n_prefix-1 columns. */ - level = root_level; + auto level = root_level; level_is_analyzed = false; for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) { @@ -2265,7 +2645,10 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) mtr.commit(); mtr.start(); mtr_sx_lock_index(index, &mtr); - if (root_level != btr_height_get(index, &mtr)) { + buf_block_t *root = btr_root_block_get(index, RW_S_LATCH, + &mtr); + if (!root || root_level != btr_page_get_level(root->frame) + || index->table->bulk_trx_id != bulk_trx_id) { /* Just quit if the tree has changed beyond recognition here. The old stats from previous runs will remain in the values that we have @@ -2277,6 +2660,8 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) break; } + mtr.memo_release(root, MTR_MEMO_PAGE_S_FIX); + /* check whether we should pick the current level; we pick level 1 even if it does not have enough distinct records because we do not want to scan the diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 4543fc21e8a..40293df21e5 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -196,18 +196,6 @@ btr_root_adjust_on_import( const dict_index_t* index) /*!< in: index tree */ MY_ATTRIBUTE((warn_unused_result)); -/**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -ulint -btr_height_get( -/*===========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((warn_unused_result)); - /** Get an index page and declare its latching order level. @param[in] index index tree @param[in] page page number @@ -537,17 +525,6 @@ btr_discard_page( btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on the root page */ mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -ulint -btr_get_size( -/*=========*/ - const dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ - MY_ATTRIBUTE((warn_unused_result)); /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume @@ -614,7 +591,6 @@ btr_root_block_get( rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr); /*!< in: mtr */ - /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index c7f25aff4b7..911c79c29ba 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -575,37 +575,6 @@ btr_estimate_n_rows_in_range( btr_pos_t* range_start, btr_pos_t* range_end); - -/** Statistics for one field of an index. */ -struct index_field_stats_t -{ - ib_uint64_t n_diff_key_vals; - ib_uint64_t n_sample_sizes; - ib_uint64_t n_non_null_key_vals; - - index_field_stats_t(ib_uint64_t n_diff_key_vals= 0, - ib_uint64_t n_sample_sizes= 0, - ib_uint64_t n_non_null_key_vals= 0) - : n_diff_key_vals(n_diff_key_vals), n_sample_sizes(n_sample_sizes), - n_non_null_key_vals(n_non_null_key_vals) - { - } -}; - -/** Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -index->stat_n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array index->stat_n_non_null_key_vals. -@param[in] index index -@return stat vector if the index is available and we get the estimated numbers, -empty vector if the index is unavailable. */ -std::vector<index_field_stats_t> -btr_estimate_number_of_different_key_vals(dict_index_t* index); - /** Gets the externally stored size of a record, in units of a database page. @param[in] rec record @param[in] offsets array returned by rec_get_offsets() |