summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-03-16 15:52:42 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-03-16 15:52:42 +0200
commitf2096478d5750b983f9a9cc4691d20e152dafd4a (patch)
tree0550b7949b594b7b045f67bf8a754ffdb5874562
parent85cbfaefee694cdd490b357444f24ff16b8042e8 (diff)
downloadmariadb-git-f2096478d5750b983f9a9cc4691d20e152dafd4a.tar.gz
MDEV-29835 InnoDB hang on B-tree split or merge
This is a follow-up to commit de4030e4d49805a7ded5c0bfee01cc3fd7623522 (MDEV-30400), which fixed some hangs related to B-tree split or merge. btr_root_block_get(): Use and update the root page guess. This is just a minor performance optimization, not affecting correctness. btr_validate_level(): Remove the parameter "lockout", and always acquire an exclusive dict_index_t::lock in CHECK TABLE without QUICK. This is needed in order to avoid latching order violation in btr_page_get_father_node_ptr_for_validate(). btr_cur_need_opposite_intention(): Return true in case btr_cur_compress_recommendation() would hold later during the mini-transaction, or if a page underflow or overflow is possible. If we return true, our caller will escalate to aqcuiring an exclusive dict_index_t::lock, to prevent a latching order violation and deadlock during btr_compress() or btr_page_split_and_insert(). btr_cur_t::search_leaf(), btr_cur_t::open_leaf(): Also invoke btr_cur_need_opposite_intention() on the leaf page. btr_cur_t::open_leaf(): When escalating to exclusive index locking, acquire exclusive latches on all pages as well. innobase_instant_try(): Return an error code if the root page cannot be retrieved. In addition to the normal stress testing with Random Query Generator (RQG) this has been tested with ./mtr --mysqld=--loose-innodb-limit-optimistic-insert-debug=2 but with the injection in btr_cur_optimistic_insert() for non-leaf pages adjusted so that it would use the value 3. (Otherwise, infinite page splits could occur in some mtr tests.) Tested by: Matthias Leich
-rw-r--r--storage/innobase/btr/btr0btr.cc141
-rw-r--r--storage/innobase/btr/btr0cur.cc160
-rw-r--r--storage/innobase/handler/handler0alter.cc1
-rw-r--r--storage/innobase/include/btr0btr.h2
-rw-r--r--storage/innobase/include/btr0types.h3
-rw-r--r--storage/innobase/include/mtr0mtr.h3
-rw-r--r--storage/innobase/row/row0log.cc6
7 files changed, 156 insertions, 160 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 7fd851f7b0e..1b69f4c7170 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -254,7 +254,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t*
btr_root_block_get(
/*===============*/
- const dict_index_t* index, /*!< in: index tree */
+ dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
@@ -266,11 +266,31 @@ btr_root_block_get(
return nullptr;
}
- buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr,
- err);
- if (block)
+ buf_block_t *block;
+#ifndef BTR_CUR_ADAPT
+ static constexpr buf_block_t *guess= nullptr;
+#else
+ buf_block_t *&guess= btr_search_get_info(index)->root_guess;
+ guess=
+#endif
+ block=
+ buf_page_get_gen(page_id_t{index->table->space->id, index->page},
+ index->table->space->zip_size(), mode, guess, BUF_GET,
+ mtr, err, false);
+ ut_ad(!block == (*err != DB_SUCCESS));
+
+ if (UNIV_LIKELY(block != nullptr))
{
- if (index->is_ibuf());
+ if (!!page_is_comp(block->page.frame) != index->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index->id ||
+ !fil_page_index_page_check(block->page.frame) ||
+ index->is_spatial() !=
+ (fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE))
+ {
+ *err= DB_PAGE_CORRUPTED;
+ block= nullptr;
+ }
+ else if (index->is_ibuf());
else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
@@ -280,6 +300,9 @@ btr_root_block_get(
block= nullptr;
}
}
+ else if (*err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index);
+
return block;
}
@@ -290,7 +313,7 @@ static
page_t*
btr_root_get(
/*=========*/
- const dict_index_t* index, /*!< in: index tree */
+ dict_index_t* index, /*!< in: index tree */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
@@ -502,9 +525,7 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index,
return block;
}
-#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
-#endif
return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
}
@@ -773,9 +794,7 @@ btr_page_get_father_node_ptr_for_validate(
const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
dict_index_t* index = btr_cur_get_index(cursor);
ut_ad(!dict_index_is_spatial(index));
-
- ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
const auto level = btr_page_get_level(btr_cur_get_page(cursor));
@@ -793,10 +812,6 @@ btr_page_get_father_node_ptr_for_validate(
}
const rec_t* node_ptr = btr_cur_get_rec(cursor);
-#if 0 /* MDEV-29835 FIXME */
- ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
- || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
-#endif
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
@@ -2456,11 +2471,10 @@ btr_insert_on_non_leaf_level(
}
ut_ad(cursor.flag == BTR_CUR_BINARY);
-#if 0 /* MDEV-29835 FIXME */
- ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive()
+ ut_ad(btr_cur_get_block(&cursor)
+ != mtr->at_savepoint(mtr->get_savepoint() - 1)
|| index->is_spatial()
|| mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
-#endif
if (UNIV_LIKELY(err == DB_SUCCESS)) {
err = btr_cur_optimistic_insert(flags,
@@ -2568,10 +2582,8 @@ btr_attach_half_pages(
prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev_block) {
-# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
-# endif
prev_block = btr_block_get(*index, prev_page_no,
RW_X_LATCH, !level, mtr);
}
@@ -2582,10 +2594,8 @@ btr_attach_half_pages(
next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next_block) {
-# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
-# endif
next_block = btr_block_get(*index, next_page_no,
RW_X_LATCH, !level, mtr);
}
@@ -3397,9 +3407,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev)
{
-# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
-# endif
prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!prev))
@@ -3415,9 +3423,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next)
{
-# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
-# endif
next= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!next))
@@ -4291,7 +4297,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
-#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_NEXT,
block->page.frame + FIL_PAGE_OFFSET,
@@ -4317,7 +4323,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
-#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame + FIL_PAGE_OFFSET,
@@ -4898,8 +4904,7 @@ btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
const trx_t* trx, /*!< in: transaction or NULL */
- ulint level, /*!< in: level number */
- bool lockout)/*!< in: true if X-latch index is intended */
+ ulint level) /*!< in: level number */
{
buf_block_t* block;
page_t* page;
@@ -4918,18 +4923,10 @@ btr_validate_level(
#ifdef UNIV_ZIP_DEBUG
page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */
- ulint savepoint = 0;
- uint32_t parent_page_no = FIL_NULL;
- uint32_t parent_right_page_no = FIL_NULL;
- bool rightmost_child = false;
mtr.start();
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ mtr_x_lock_index(index, &mtr);
dberr_t err;
block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err);
@@ -5025,11 +5022,7 @@ func_exit:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ mtr_x_lock_index(index, &mtr);
page = block->page.frame;
@@ -5073,7 +5066,6 @@ func_exit:
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
- savepoint = mtr.get_savepoint();
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
@@ -5177,11 +5169,6 @@ broken_links:
father_page = btr_cur_get_page(&node_cur);
node_ptr = btr_cur_get_rec(&node_cur);
- parent_page_no = page_get_page_no(father_page);
- parent_right_page_no = btr_page_get_next(father_page);
- rightmost_child = page_rec_is_supremum(
- page_rec_get_next(node_ptr));
-
rec = page_rec_get_prev(page_get_supremum_rec(page));
if (rec) {
btr_cur_position(index, rec, block, &node_cur);
@@ -5263,37 +5250,6 @@ broken_links:
}
} else if (const rec_t* right_node_ptr
= page_rec_get_next(node_ptr)) {
- if (!lockout && rightmost_child) {
-
- /* To obey latch order of tree blocks,
- we should release the right_block once to
- obtain lock of the uncle block. */
- ut_ad(right_block
- == mtr.at_savepoint(savepoint));
- mtr.rollback_to_savepoint(savepoint,
- savepoint + 1);
-
- if (parent_right_page_no != FIL_NULL) {
- btr_block_get(*index,
- parent_right_page_no,
- RW_SX_LATCH, false,
- &mtr);
- }
-
- right_block = btr_block_get(*index,
- right_page_no,
- RW_SX_LATCH,
- !level, &mtr,
- &err);
- if (!right_block) {
- btr_validate_report1(index, level,
- block);
- fputs("InnoDB: broken FIL_PAGE_NEXT"
- " link\n", stderr);
- goto invalid_page;
- }
- }
-
btr_cur_position(
index,
page_get_infimum_rec(right_block->page.frame),
@@ -5365,20 +5321,6 @@ node_ptr_fails:
mtr.start();
- if (!lockout) {
- if (rightmost_child) {
- if (parent_right_page_no != FIL_NULL) {
- btr_block_get(*index,
- parent_right_page_no,
- RW_SX_LATCH, false,
- &mtr);
- }
- } else if (parent_page_no != FIL_NULL) {
- btr_block_get(*index, parent_page_no,
- RW_SX_LATCH, false, &mtr);
- }
- }
-
block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
goto loop;
@@ -5396,21 +5338,16 @@ btr_validate_index(
dict_index_t* index, /*!< in: index */
const trx_t* trx) /*!< in: transaction or NULL */
{
- const bool lockout= index->is_spatial();
-
mtr_t mtr;
mtr.start();
- if (lockout)
- mtr_x_lock_index(index, &mtr);
- else
- mtr_sx_lock_index(index, &mtr);
+ mtr_x_lock_index(index, &mtr);
dberr_t err;
if (page_t *root= btr_root_get(index, &mtr, &err))
for (auto level= btr_page_get_level(root);; level--)
{
- if (dberr_t err_level= btr_validate_level(index, trx, level, lockout))
+ if (dberr_t err_level= btr_validate_level(index, trx, level))
err= err_level;
if (!level)
break;
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 62c7d44d286..27ed631099d 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -748,29 +748,34 @@ btr_cur_will_modify_tree(
/** Detects whether the modifying record might need a opposite modification
to the intention.
-@param[in] page page
-@param[in] lock_intention lock intention for the tree operation
-@param[in] rec record (current node_ptr)
+@param page page
+@param lock_intention lock intention for the tree operation
+@param node_ptr_max_size the maximum size of a node pointer
+@param compress_limit BTR_CUR_PAGE_COMPRESS_LIMIT(index)
+@param rec record (current node_ptr)
@return true if tree modification is needed */
-static
-bool
-btr_cur_need_opposite_intention(
- const page_t* page,
- btr_intention_t lock_intention,
- const rec_t* rec)
+static bool btr_cur_need_opposite_intention(const page_t *page,
+ btr_intention_t lock_intention,
+ ulint node_ptr_max_size,
+ ulint compress_limit,
+ const rec_t *rec)
{
- switch (lock_intention) {
- case BTR_INTENTION_DELETE:
- return (page_has_prev(page) && page_rec_is_first(rec, page)) ||
- (page_has_next(page) && page_rec_is_last(rec, page));
- case BTR_INTENTION_INSERT:
- return page_has_next(page) && page_rec_is_last(rec, page);
- case BTR_INTENTION_BOTH:
- return(false);
- }
-
- MY_ASSERT_UNREACHABLE();
- return(false);
+ if (lock_intention != BTR_INTENTION_INSERT)
+ {
+ /* We compensate also for btr_cur_compress_recommendation() */
+ if (!page_has_siblings(page) ||
+ page_rec_is_first(rec, page) || page_rec_is_last(rec, page) ||
+ page_get_data_size(page) < node_ptr_max_size + compress_limit)
+ return true;
+ if (lock_intention == BTR_INTENTION_DELETE)
+ return false;
+ }
+ else if (page_has_next(page) && page_rec_is_last(rec, page))
+ return true;
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), return true);
+ const ulint max_size= page_get_max_insert_size_after_reorganize(page, 2);
+ return max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + node_ptr_max_size ||
+ max_size < node_ptr_max_size * 2;
}
/**
@@ -1038,7 +1043,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
const ulint savepoint= mtr->get_savepoint();
- ulint node_ptr_max_size= 0;
+ ulint node_ptr_max_size= 0, compress_limit= 0;
rw_lock_type_t rw_latch= RW_S_LATCH;
switch (latch_mode) {
@@ -1050,13 +1055,19 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
break;
}
- if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads &&
- trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
- /* Most delete-intended operations are due to the purge of history.
- Prioritize them when the history list is growing huge. */
- mtr_x_lock_index(index(), mtr);
- else
- mtr_sx_lock_index(index(), mtr);
+ if (lock_intention == BTR_INTENTION_DELETE)
+ {
+ compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index());
+ if (buf_pool.n_pend_reads &&
+ trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
+ {
+ /* Most delete-intended operations are due to the purge of history.
+ Prioritize them when the history list is growing huge. */
+ mtr_x_lock_index(index(), mtr);
+ break;
+ }
+ }
+ mtr_sx_lock_index(index(), mtr);
break;
#ifdef UNIV_DEBUG
case BTR_CONT_MODIFY_TREE:
@@ -1331,6 +1342,10 @@ release_tree:
!btr_block_get(*index(), btr_page_get_next(block->page.frame),
RW_X_LATCH, false, mtr, &err))
goto func_exit;
+ if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
+ node_ptr_max_size, compress_limit,
+ page_cur.rec))
+ goto need_opposite_intention;
}
reached_latched_leaf:
@@ -1384,6 +1399,7 @@ release_tree:
break;
case BTR_MODIFY_TREE:
if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
+ node_ptr_max_size, compress_limit,
page_cur.rec))
/* If the rec is the first or last in the page for pessimistic
delete intention, it might cause node_ptr insert for the upper
@@ -1536,6 +1552,17 @@ release_tree:
goto search_loop;
}
+ATTRIBUTE_COLD void mtr_t::index_lock_upgrade()
+{
+ auto &slot= m_memo[get_savepoint() - 1];
+ if (slot.type == MTR_MEMO_X_LOCK)
+ return;
+ ut_ad(slot.type == MTR_MEMO_SX_LOCK);
+ index_lock *lock= static_cast<index_lock*>(slot.object);
+ lock->u_x_upgrade(SRW_LOCK_CALL);
+ slot.type= MTR_MEMO_X_LOCK;
+}
+
ATTRIBUTE_COLD
dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
@@ -1554,8 +1581,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
ut_ad(block->page.id().page_no() == index()->page);
block->page.fix();
mtr->rollback_to_savepoint(1);
- ut_ad(mtr->memo_contains_flagged(&index()->lock,
- MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK));
+ mtr->index_lock_upgrade();
const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)};
@@ -1785,7 +1811,6 @@ search_loop:
dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
btr_latch_mode latch_mode, mtr_t *mtr)
{
- btr_intention_t lock_intention;
ulint n_blocks= 0;
mem_heap_t *heap= nullptr;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
@@ -1797,7 +1822,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
- lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
+ btr_intention_t lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
@@ -1805,7 +1830,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
auto savepoint= mtr->get_savepoint();
rw_lock_type_t upper_rw_latch= RW_X_LATCH;
- ulint node_ptr_max_size= 0;
+ ulint node_ptr_max_size= 0, compress_limit= 0;
if (latch_mode == BTR_MODIFY_TREE)
{
@@ -1814,12 +1839,18 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
and read IO bandwidth should be prioritized for them, when the
history list is growing huge. */
savepoint++;
- if (lock_intention == BTR_INTENTION_DELETE
- && buf_pool.n_pend_reads
- && trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
- mtr_x_lock_index(index, mtr);
- else
- mtr_sx_lock_index(index, mtr);
+ if (lock_intention == BTR_INTENTION_DELETE)
+ {
+ compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index);
+
+ if (buf_pool.n_pend_reads &&
+ trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
+ {
+ mtr_x_lock_index(index, mtr);
+ goto index_locked;
+ }
+ }
+ mtr_sx_lock_index(index, mtr);
}
else
{
@@ -1840,9 +1871,11 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
}
}
+index_locked:
ut_ad(savepoint == mtr->get_savepoint());
- const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12);
+ const rw_lock_type_t root_leaf_rw_latch=
+ rw_lock_type_t(latch_mode & (RW_S_LATCH | RW_X_LATCH));
page_cur.index = index;
@@ -1913,15 +1946,28 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
!btr_block_get(*index, btr_page_get_next(block->page.frame),
RW_X_LATCH, false, mtr, &err))
break;
+
+ if (!index->lock.have_x() &&
+ btr_cur_need_opposite_intention(block->page.frame,
+ lock_intention,
+ node_ptr_max_size,
+ compress_limit, page_cur.rec))
+ goto need_opposite_intention;
}
else
{
if (rw_latch == RW_NO_LATCH)
mtr->upgrade_buffer_fix(leaf_savepoint - 1,
- rw_lock_type_t(latch_mode));
- /* Release index->lock if needed, and the non-leaf pages. */
- mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
- leaf_savepoint - 1);
+ rw_lock_type_t(latch_mode &
+ (RW_X_LATCH | RW_S_LATCH)));
+ if (latch_mode != BTR_CONT_MODIFY_TREE)
+ {
+ ut_ad(latch_mode == BTR_MODIFY_LEAF ||
+ latch_mode == BTR_SEARCH_LEAF);
+ /* Release index->lock if needed, and the non-leaf pages. */
+ mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
+ leaf_savepoint - 1);
+ }
}
break;
}
@@ -1943,22 +1989,25 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
: !page_cur_move_to_prev(&page_cur))
goto corrupted;
- const rec_t *node_ptr= page_cur.rec;
- offsets= rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED,
+ offsets= rec_get_offsets(page_cur.rec, index, offsets, 0, ULINT_UNDEFINED,
&heap);
ut_ad(latch_mode != BTR_MODIFY_TREE || upper_rw_latch == RW_X_LATCH);
if (latch_mode != BTR_MODIFY_TREE);
- else if (btr_cur_need_opposite_intention(block->page.frame,
- lock_intention, node_ptr))
+ else if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
+ node_ptr_max_size, compress_limit,
+ page_cur.rec))
{
+ need_opposite_intention:
/* If the rec is the first or last in the page for pessimistic
delete intention, it might cause node_ptr insert for the upper
level. We should change the intention and retry. */
mtr->rollback_to_savepoint(savepoint);
- lock_intention= BTR_INTENTION_BOTH;
+ mtr->index_lock_upgrade();
+ /* X-latch all pages from now on */
+ latch_mode= BTR_CONT_MODIFY_TREE;
page= index->page;
height= ULINT_UNDEFINED;
n_blocks= 0;
@@ -1967,7 +2016,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
else
{
if (!btr_cur_will_modify_tree(index, block->page.frame,
- lock_intention, node_ptr,
+ lock_intention, page_cur.rec,
node_ptr_max_size, zip_size, mtr))
{
ut_ad(n_blocks);
@@ -1997,7 +2046,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
}
/* Go to the child node */
- page= btr_node_ptr_get_child_page_no(node_ptr, offsets);
+ page= btr_node_ptr_get_child_page_no(page_cur.rec, offsets);
n_blocks++;
}
@@ -2307,8 +2356,7 @@ convert_big_rec:
return(DB_TOO_BIG_RECORD);
}
- LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
- goto fail);
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), goto fail);
if (block->page.zip.data && leaf
&& (page_get_data_size(page) + rec_size
@@ -2322,7 +2370,7 @@ fail:
/* prefetch siblings of the leaf for the pessimistic
operation, if the page is leaf. */
- if (page_is_leaf(page)) {
+ if (leaf) {
btr_cur_prefetch_siblings(block, index);
}
fail_err:
@@ -2391,7 +2439,7 @@ fail_err:
#ifdef UNIV_DEBUG
if (!(flags & BTR_CREATE_FLAG)
- && index->is_primary() && page_is_leaf(page)) {
+ && leaf && index->is_primary()) {
const dfield_t* trx_id = dtuple_get_nth_field(
entry, dict_col_get_clust_pos(
dict_table_get_sys_col(index->table,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 9e9c0a17a39..6a8986d76d2 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -6104,6 +6104,7 @@ func_exit:
id, MTR_MEMO_PAGE_SX_FIX);
if (UNIV_UNLIKELY(!root)) {
+ err = DB_CORRUPTION;
goto func_exit;
}
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a1cc10b05db..a56598d3620 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t*
btr_root_block_get(
/*===============*/
- const dict_index_t* index, /*!< in: index tree */
+ dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 912c022c64f..fc829e7857a 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -103,6 +103,9 @@ enum btr_latch_mode {
dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
+ /** Attempt to modify records in an x-latched tree. */
+ BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE
+ | BTR_ALREADY_S_LATCHED,
/** U-latch root and X-latch a leaf page, assuming that
dict_index_t::lock is being held in U mode. */
BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index f3fe1841b2e..60e01abe18d 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -344,6 +344,9 @@ public:
/** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block);
+ /** Upgrade index U lock to X */
+ ATTRIBUTE_COLD void index_lock_upgrade();
+
/** Check if we are holding tablespace latch
@param space tablespace to search for
@return whether space.latch is being held */
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 0743dc2bb50..b21ff2b9f86 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -3078,6 +3078,9 @@ row_log_apply_op_low(
mtr_start(&mtr);
index->set_modified(mtr);
cursor.page_cur.index = index;
+ if (has_index_lock) {
+ mtr_x_lock_index(index, &mtr);
+ }
/* We perform the pessimistic variant of the operations if we
already hold index->lock exclusively. First, search the
@@ -3085,7 +3088,8 @@ row_log_apply_op_low(
depending on when the row in the clustered index was
scanned. */
*error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
- ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr);
+ ? BTR_MODIFY_TREE_ALREADY_LATCHED
+ : BTR_MODIFY_LEAF, &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}