diff options
author | Vlad Lesin <vlad_lesin@mail.ru> | 2021-12-02 16:14:27 +0300 |
---|---|---|
committer | Vlad Lesin <vlad_lesin@mail.ru> | 2022-01-18 15:18:42 +0300 |
commit | bd03c0e51629e1c3969a171137712a6bb854c232 (patch) | |
tree | 4a2d5de90cde77d0e4afcc9e05a36443d0e08cf9 | |
parent | 1abc476f0b4fe60cb268fc68cefd9b5a479983ac (diff) | |
download | mariadb-git-bb-10.6-MDEV-27025-deadlock.tar.gz |
MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lockbb-10.6-MDEV-27025-deadlock
When lock is checked for conflict, ignore other locks on the record if
they wait for the requesting transaction.
lock_rec_has_to_wait_in_queue() iterates not all locks for
the page, but only the locks located before the waiting lock in the
queue. So there is some invariant - any lock in the queue can wait only
lock which is located before the waiting lock in the queue.
In the case when conflicting lock waits for the transaction of
requesting lock, we need to place the requesting lock before the waiting
lock in the queue to preserve the invariant. That is why we are looking
for the first waiting for requesting transation lock and place the new
lock just after the last granted requesting transaction lock before the
first waiting for requesting transaction lock.
Example:
trx1 waiting lock, trx1 granted lock, ..., trx2 lock - waiting for trx1
place new lock here -----------------^
There are also implicit locks which are lazily converted to explicit
ones, and we need to place the newly created explicit lock to the correct
place in a queue. All explicit locks converted from implicit ones are
placed just after the last non-waiting lock of the same transaction before
the first waiting for the transaction lock.
Code review and cleanup was made by Marko Mäkelä.
-rw-r--r-- | mysql-test/suite/innodb/r/lock_wait_conflict.result | 27 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/lock_wait_conflict.test | 60 | ||||
-rw-r--r-- | mysql-test/suite/versioning/r/update.result | 1 | ||||
-rw-r--r-- | mysql-test/suite/versioning/t/update.test | 4 | ||||
-rw-r--r-- | storage/innobase/include/hash0hash.h | 41 | ||||
-rw-r--r-- | storage/innobase/include/lock0lock.h | 9 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 145 |
7 files changed, 241 insertions, 46 deletions
diff --git a/mysql-test/suite/innodb/r/lock_wait_conflict.result b/mysql-test/suite/innodb/r/lock_wait_conflict.result new file mode 100644 index 00000000000..25d18c03ea1 --- /dev/null +++ b/mysql-test/suite/innodb/r/lock_wait_conflict.result @@ -0,0 +1,27 @@ +# +# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +# +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; +connect prevent_purge,localhost,root,,; +start transaction with consistent snapshot; +connection default; +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; +connect con_ins,localhost,root,,; +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +INSERT INTO t VALUES(10, 20); +connect con_del,localhost,root,,; +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +DELETE FROM t WHERE b = 20; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; +connection con_ins; +disconnect con_ins; +connection con_del; +disconnect con_del; +disconnect prevent_purge; +connection default; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/lock_wait_conflict.test b/mysql-test/suite/innodb/t/lock_wait_conflict.test new file mode 100644 index 00000000000..46a29e14b43 --- /dev/null +++ b/mysql-test/suite/innodb/t/lock_wait_conflict.test @@ -0,0 +1,60 @@ +--source include/have_innodb.inc +--source include/count_sessions.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +--echo # +--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +--echo # + +# The test checks the ability to acquire exclusive record lock if the acquiring +# transaction already holds a shared lock on the record and another transaction +# is waiting for a lock. + +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; + +--connect(prevent_purge,localhost,root,,) +start transaction with consistent snapshot; + +--connection default +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; + +--connect(con_ins,localhost,root,,) +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +send +INSERT INTO t VALUES(10, 20); + +--connect(con_del,localhost,root,,) +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +############################################################################### +# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is +# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last +# INSERT. After the last INSERT creates insert-intention lock on +# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this +# DELETE, what causes DEADLOCK error for this DELETE. +############################################################################### +send +DELETE FROM t WHERE b = 20; + +--connection default +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; + +--connection con_ins +--reap +--disconnect con_ins + +--connection con_del +# Without the fix, ER_LOCK_DEADLOCK would be reported here. +--reap +--disconnect con_del + +--disconnect prevent_purge + +--connection default + +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/versioning/r/update.result b/mysql-test/suite/versioning/r/update.result index 9e5e28b09a1..41058969801 100644 --- a/mysql-test/suite/versioning/r/update.result +++ b/mysql-test/suite/versioning/r/update.result @@ -283,7 +283,6 @@ connection default; update t1 set b = 'foo'; connection con1; update t1 set a = 'bar'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction disconnect con1; connection default; drop table t1; diff --git a/mysql-test/suite/versioning/t/update.test b/mysql-test/suite/versioning/t/update.test index a3252c9f8e3..df07cec33cb 100644 --- a/mysql-test/suite/versioning/t/update.test +++ b/mysql-test/suite/versioning/t/update.test @@ -186,7 +186,9 @@ send update t1 set b = 'foo'; connection con1; let $wait_condition= select count(*) from information_schema.innodb_lock_waits; source include/wait_condition.inc; -error ER_LOCK_DEADLOCK; +# There must no be DEADLOCK here as con1 transaction already holds locks, and +# default's transaction lock is waiting, so the locks of the following "UPDATE" +# must not conflict with waiting lock. update t1 set a = 'bar'; disconnect con1; connection default; diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index 8e7b8dfd1e6..a732c17f767 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,8 +29,43 @@ Created 5/20/1997 Heikki Tuuri #include "ut0new.h" struct hash_table_t; -struct hash_cell_t{ - void* node; /*!< hash chain node, NULL if none */ +struct hash_cell_t +{ + /** singly-linked, nullptr terminated list of hash buckets */ + void *node; + + /** Append an element. + @tparam T type of the element + @param insert the being-inserted element + @param next the next-element pointer in T */ + template<typename T> + void append(T &insert, T *T::*next) + { + void **after; + for (after= &node; *after; + after= reinterpret_cast<void**>(&(static_cast<T*>(*after)->*next))); + insert.*next= nullptr; + *after= &insert; + } + + /** Insert an element after another. + @tparam T type of the element + @param after the element after which to insert + @param insert the being-inserted element + @param next the next-element pointer in T */ + template<typename T> + void insert_after(T &after, T &insert, T *T::*next) + { +#ifdef UNIV_DEBUG + for (const T *c= static_cast<const T*>(node); c; c= c->*next) + if (c == &after) + goto found; + ut_error; + found: +#endif + insert.*next= after.*next; + after.*next= &insert; + } }; /*******************************************************************//** diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 50b9792cf2b..f2b1dc488d6 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1175,6 +1175,10 @@ without checking for deadlocks or conflicts. @param[in] index the index tree @param[in,out] trx transaction @param[in] holds_trx_mutex whether the caller holds trx->mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( @@ -1185,7 +1189,8 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex); + bool holds_trx_mutex, + bool insert_before_waiting = false); /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index bebd2acfa0d..5551c85db0b 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1004,24 +1004,43 @@ func_exit: /*********************************************************************//** Checks if some other transaction has a conflicting explicit lock request in the queue, so that we have to wait. -@return lock or NULL */ -static -lock_t* -lock_rec_other_has_conflicting( -/*===========================*/ - unsigned mode, /*!< in: LOCK_S or LOCK_X, - possibly ORed to LOCK_GAP or - LOC_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - const hash_cell_t& cell, /*!< in: lock hash table cell */ - const page_id_t id, /*!< in: page identifier */ - ulint heap_no,/*!< in: heap number of the record */ - const trx_t* trx) /*!< in: our transaction */ +@param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP, +LOCK_INSERT_INTENTION +@param[in] cell lock hash table cell +@param[in] id page identifier +@param[in] heap_no heap number of the record +@param[in] trx our transaction +@param[out] was_ignored true if conflicting locks waiting for the current +transaction were ignored +@return conflicting lock and the flag which indicated if conflicting locks +which wait for the current transaction were ignored */ +static lock_t *lock_rec_other_has_conflicting(unsigned mode, + const hash_cell_t &cell, + const page_id_t id, + ulint heap_no, const trx_t *trx, + bool *was_ignored= nullptr) { bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM); for (lock_t* lock = lock_sys_t::get_first(cell, id, heap_no); lock; lock = lock_rec_get_next(heap_no, lock)) { + /* There is no need to lock lock_sys.wait_mutex to check + trx->lock.wait_trx here because the current function is + executed under the cell latch, and trx->lock.wait_trx + transaction can change wait_trx field only under the cell + latch, wait_trx trx_t object can not be deinitialized before + releasing all its locks, and during releasing the locks the + cell latch will also be requested. So while the cell latch is + held, lock->trx->lock.wait_trx can't be changed. There also + can't be lock loops for one record, because all waiting locks + of the record will always wait for the same lock of the record + in a cell array, and check for conflicting lock will always + start with the first lock for the heap_no, and go ahead with + the same order(the order of the locks in the cell array) */ + if (lock->is_waiting() && lock->trx->lock.wait_trx == trx) { + if (was_ignored) *was_ignored= true; + continue; + } if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) { return(lock); } @@ -1147,6 +1166,10 @@ without checking for deadlocks or conflicts. @param[in] index the index tree @param[in,out] trx transaction @param[in] holds_trx_mutex whether the caller holds trx->mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( @@ -1157,7 +1180,8 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex) + bool holds_trx_mutex, + bool insert_before_waiting) { lock_t* lock; ulint n_bytes; @@ -1232,7 +1256,36 @@ lock_rec_create_low( ut_ad(index->table->get_ref_count() || !index->table->can_be_evicted); const auto lock_hash = &lock_sys.hash_get(type_mode); - HASH_INSERT(lock_t, hash, lock_hash, page_id.fold(), lock); + hash_cell_t& cell = *lock_hash->cell_get(page_id.fold()); + + if (insert_before_waiting + && !(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE))) { + /* Try to insert the lock just after the last non-waiting + lock of the current transaction which immediately + precedes the first waiting lock request. */ + + lock_t* last_non_waiting = nullptr; + + for (lock_t* l = lock_sys_t::get_first(cell, page_id, heap_no); + l; l = lock_rec_get_next(heap_no, l)) { + if (l->is_waiting() && l->trx->lock.wait_trx == trx) { + break; + } + if (l->trx == trx) { + last_non_waiting = l; + } + } + + if (!last_non_waiting) { + goto append_last; + } + + cell.insert_after(*last_non_waiting, *lock, &lock_t::hash); + } + else { +append_last: + cell.append(*lock, &lock_t::hash); + } if (type_mode & LOCK_WAIT) { if (trx->lock.wait_trx) { @@ -1357,24 +1410,22 @@ on the record, and the request to be added is not a waiting request, we can reuse a suitable record lock object already existing on the same page, just setting the appropriate bit in its bitmap. This is a low-level function which does NOT check for deadlocks or lock compatibility! -@return lock where the bit was set */ +@param[in] type_mode lock mode, wait, gap etc. flags +@param[in,out] cell first hash table cell +@param[in] id page identifier +@param[in] page buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex TRUE if caller owns the transaction mutex +@param[in] insert_before_waiting true=insert B-tree record lock right +before a waiting lock request; false=insert the lock at the end of the queue */ TRANSACTIONAL_TARGET -static -void -lock_rec_add_to_queue( -/*==================*/ - unsigned type_mode,/*!< in: lock mode, wait, gap - etc. flags */ - hash_cell_t& cell, /*!< in,out: first hash table cell */ - const page_id_t id, /*!< in: page identifier */ - const page_t* page, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in/out: transaction */ - bool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns the - transaction mutex */ +static void lock_rec_add_to_queue(unsigned type_mode, hash_cell_t &cell, + const page_id_t id, const page_t *page, + ulint heap_no, dict_index_t *index, + trx_t *trx, bool caller_owns_trx_mutex, + bool insert_before_waiting= false) { ut_d(lock_sys.hash_get(type_mode).assert_locked(id)); ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner()); @@ -1469,7 +1520,7 @@ create: lock_rec_create_low(nullptr, type_mode, id, page, heap_no, index, trx, - caller_owns_trx_mutex); + caller_owns_trx_mutex, insert_before_waiting); } /*********************************************************************//** @@ -1536,8 +1587,10 @@ lock_rec_lock( /* Do nothing if the trx already has a strong enough lock on rec */ if (!lock_rec_has_expl(mode, g.cell(), id, heap_no, trx)) { + bool was_ignored = false; if (lock_t *c_lock= lock_rec_other_has_conflicting(mode, g.cell(), id, - heap_no, trx)) + heap_no, trx, + &was_ignored)) /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not @@ -1550,7 +1603,7 @@ lock_rec_lock( { /* Set the requested lock on the record. */ lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no, - index, trx, true); + index, trx, true, was_ignored); err= DB_SUCCESS_LOCKED_REC; } } @@ -4654,7 +4707,7 @@ func_exit: wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); - if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + if (!lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, cell, id, heap_no, impl_trx)) { ib::info() << "WSREP impl BF lock conflict"; @@ -4663,8 +4716,22 @@ func_exit: #endif /* WITH_WSREP */ { ut_ad(other_lock->is_waiting()); - ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - cell, id, heap_no, + /* After MDEV-27025 fix the following case is + possible: + 1. trx 1 acquires S-lock; + 2. trx 2 creates X-lock waiting for trx 1; + 3. trx 1 creates implicit lock, as + lock_rec_other_has_conflicting() returns no + conflicting trx 2 X-lock, the explicit lock + will not be created; + 4. trx 3 creates waiting X-lock, + it will wait for S-lock of trx 1. + That is why we relaxing the condition here and + check only for S-lock. + */ + ut_ad(lock_rec_has_expl(LOCK_S + | LOCK_REC_NOT_GAP, + cell, id, heap_no, impl_trx)); } } @@ -5060,7 +5127,7 @@ lock_rec_convert_impl_to_expl_for_trx( !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no, trx)) lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, - page_align(rec), heap_no, index, trx, true); + page_align(rec), heap_no, index, trx, true, true); } trx->mutex_unlock(); |