From 0a7d85c97f6dd68f78ecb3e7074043ff2d4c24fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 5 Dec 2022 18:00:22 +0200 Subject: MDEV-30148 Race condition between non-persistent statistics and purge btr_cur_t::open_random_leaf(): Replaces btr_cur_open_at_rnd_pos(). Acquire a shared latch on each page, and finally release all latches except the one on the leaf page. This fixes a race condition between the purge of history and btr_estimate_number_of_different_key_vals(), which turned out to only hold a buffer-fix on the randomly chosen leaf page. Typically, an assertion would fail in page_rec_is_supremum(). ibuf_contract(): Start from the beginning of the change buffer, to simplify the logic. Starting with commit b42294bc6409794bdbd2051b32fa079d81cea61d it does not matter much where the change buffer merge is being initiated. The race condition may have been introduced as early as mysql/mysql-server@ac74632293bea967b352d1b472abedeeaa921b98 from where it was copied to commit 2e814d4702d71a04388386a9f591d14a35980bfe. Reviewed by: Vladislav Lesin Tested by: Matthias Leich --- storage/innobase/btr/btr0cur.cc | 282 ---------------------------------------- 1 file changed, 282 deletions(-) (limited to 'storage/innobase/btr') diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c6f11aa518f..ac06d9b1568 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2629,288 +2629,6 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, return err; } -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. -@return true if the index is available and we have put the cursor, false -if the index is unavailable */ -bool -btr_cur_open_at_rnd_pos( - dict_index_t* index, /*!< in: index */ - btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t* page_cursor; - ulint node_ptr_max_size = srv_page_size / 2; - ulint height; - rec_t* node_ptr; - btr_intention_t lock_intention; - buf_block_t* tree_blocks[BTR_MAX_LEVELS]; - ulint tree_savepoints[BTR_MAX_LEVELS]; - ulint n_blocks = 0; - ulint n_releases = 0; - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(!index->is_spatial()); - - lock_intention = btr_cur_get_and_clear_intention(&latch_mode); - - ulint savepoint = mtr_set_savepoint(mtr); - - rw_lock_type_t upper_rw_latch; - - switch (latch_mode) { - case BTR_MODIFY_TREE: - /* Most of delete-intended operations are purging. - Free blocks and read IO bandwidth should be prior - for them, when the history list is glowing huge. */ - if (lock_intention == BTR_INTENTION_DELETE - && buf_pool.n_pend_reads - && trx_sys.history_size_approx() - > BTR_CUR_FINE_HISTORY_LENGTH) { - mtr_x_lock_index(index, mtr); - } else { - mtr_sx_lock_index(index, mtr); - } - upper_rw_latch = RW_X_LATCH; - break; - case BTR_SEARCH_PREV: - case BTR_MODIFY_PREV: - /* This function doesn't support left uncle - page lock for left leaf page lock, when - needed. */ - case BTR_SEARCH_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - ut_ad(0); - /* fall through */ - default: - if (!srv_read_only_mode) { - mtr_s_lock_index(index, mtr); - upper_rw_latch = RW_S_LATCH; - } else { - upper_rw_latch = RW_NO_LATCH; - } - } - - DBUG_EXECUTE_IF("test_index_is_unavailable", - return(false);); - - if (index->page == FIL_NULL) { - /* Since we don't hold index lock until just now, the index - could be modified by others, for example, if this is a - statistics updater for referenced table, it could be marked - as unavailable by 'DROP TABLE' in the mean time, since - we don't hold lock for statistics updater */ - return(false); - } - - const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf( - latch_mode); - - page_cursor = btr_cur_get_page_cur(cursor); - page_cursor->index = index; - - page_id_t page_id(index->table->space_id, index->page); - const ulint zip_size = index->table->space->zip_size(); - dberr_t err; - - if (root_leaf_rw_latch == RW_X_LATCH) { - node_ptr_max_size = btr_node_ptr_max_size(index); - } - - height = ULINT_UNDEFINED; - - for (;;) { - page_t* page; - - ut_ad(n_blocks < BTR_MAX_LEVELS); - tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); - - const rw_lock_type_t rw_latch = height - && latch_mode != BTR_MODIFY_TREE - ? upper_rw_latch : RW_NO_LATCH; - buf_block_t* block = buf_page_get_gen(page_id, zip_size, - rw_latch, NULL, BUF_GET, - mtr, &err, - height == 0 - && !index->is_clust()); - tree_blocks[n_blocks] = block; - - ut_ad((block != NULL) == (err == DB_SUCCESS)); - - if (!block) { - if (err == DB_DECRYPTION_FAILED) { - btr_decryption_failed(*index); - } - - break; - } - - page = buf_block_get_frame(block); - - if (height == ULINT_UNDEFINED - && page_is_leaf(page) - && rw_latch != RW_NO_LATCH - && rw_latch != root_leaf_rw_latch) { - /* We should retry to get the page, because the root page - is latched with different level as a leaf page. */ - ut_ad(root_leaf_rw_latch != RW_NO_LATCH); - ut_ad(rw_latch == RW_S_LATCH); - - ut_ad(n_blocks == 0); - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_blocks], - tree_blocks[n_blocks]); - - upper_rw_latch = root_leaf_rw_latch; - continue; - } - - ut_ad(fil_page_index_page_check(page)); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page); - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH - || srv_read_only_mode) { - btr_cur_latch_leaves(block, latch_mode, cursor, - mtr); - } - - /* btr_cur_t::open_leaf() and - btr_cur_search_to_nth_level() release - tree s-latch here.*/ - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: - /* Release the tree s-latch */ - if (!srv_read_only_mode) { - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - &index->lock); - } - - /* release upper blocks */ - for (; n_releases < n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, - tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - } - - page_cursor->block = block; - page_cur_open_on_rnd_user_rec(page_cursor); - - if (height == 0) { - - break; - } - - ut_ad(height > 0); - - height--; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, page_cursor->index, - offsets, 0, ULINT_UNDEFINED, &heap); - - /* If the rec is the first or last in the page for - pessimistic delete intention, it might cause node_ptr insert - for the upper level. We should change the intention and retry. - */ - if (latch_mode == BTR_MODIFY_TREE - && btr_cur_need_opposite_intention( - page, lock_intention, node_ptr)) { - - ut_ad(upper_rw_latch == RW_X_LATCH); - /* release all blocks */ - for (; n_releases <= n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - - lock_intention = BTR_INTENTION_BOTH; - - page_id.set_page_no(dict_index_get_page(index)); - - height = ULINT_UNDEFINED; - - n_blocks = 0; - n_releases = 0; - - continue; - } - - if (latch_mode == BTR_MODIFY_TREE - && !btr_cur_will_modify_tree( - page_cursor->index, page, lock_intention, - node_ptr, node_ptr_max_size, zip_size, mtr)) { - ut_ad(upper_rw_latch == RW_X_LATCH); - ut_ad(n_releases <= n_blocks); - - /* we can release upper blocks */ - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0) { - /* we should not release root page - to pin to same block. */ - continue; - } - - /* release unused blocks to unpin */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - if (height == 0 - && latch_mode == BTR_MODIFY_TREE) { - ut_ad(upper_rw_latch == RW_X_LATCH); - /* we should sx-latch root page, if released already. - It contains seg_header. */ - if (n_releases > 0) { - mtr->sx_latch_at_savepoint( - tree_savepoints[0], - tree_blocks[0]); - } - - /* x-latch the branch blocks not released yet. */ - for (ulint i = n_releases; i <= n_blocks; i++) { - mtr->x_latch_at_savepoint( - tree_savepoints[i], - tree_blocks[i]); - } - } - - /* Go to the child node */ - page_id.set_page_no( - btr_node_ptr_get_child_page_no(node_ptr, offsets)); - - n_blocks++; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return err == DB_SUCCESS; -} - /*==================== B-TREE INSERT =========================*/ /*************************************************************//** -- cgit v1.2.1