diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-08-26 14:11:17 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-09-10 08:25:21 +0300 |
commit | bc781fe445850ced7740140092fa0506cd328f21 (patch) | |
tree | dc45e4497c37f7f6b0eb8279baab5bd4df3ae4b0 | |
parent | 34212233634b53a4fea7f662c43c19028aec2c2a (diff) | |
download | mariadb-git-bb-10.5-MDEV-23101.tar.gz |
MDEV-23101 : SIGSEGV in lock_rec_unlock() when Galera is enabledbb-10.5-MDEV-23101
Remove incorrect BF (brute force) handling from lock_rec_has_to_wait_in_queue
and move condition to correct callers. Add a function to report
BF lock waits and assert if incorrect BF-BF lock wait happens.
wsrep_report_bf_lock_wait
Add a new function to report BF lock wait.
wsrep_assert_no_bf_bf_wait
Add a new function to check do we have a
BF-BF wait and if we have report this case
and assert as it is a bug.
lock_rec_has_to_wait
Use new wsrep_assert_bf_wait to check BF-BF wait.
lock_rec_create_low
lock_table_create
Use new function to report BF lock waits.
lock_rec_insert_by_trx_age
lock_grant_and_move_on_page
lock_grant_and_move_on_rec
Assert that trx is not Galera as VATS is not compatible
with Galera.
lock_rec_add_to_queue
If there is conflicting lock in a queue make sure that
transaction is BF.
lock_rec_has_to_wait_in_queue
Remove incorrect BF handling. If there is conflicting
locks in a queue all transactions must wait.
lock_rec_dequeue_from_page
lock_rec_unlock
If there is conflicting lock make sure it is not
BF-BF case.
lock_rec_queue_validate
Add Galera record locking rules comment and use
new function to report BF lock waits.
All attempts to reproduce the original assertion have been
failed. Therefore, there is no test case on this commit.
-rw-r--r-- | include/mysql/service_wsrep.h | 5 | ||||
-rw-r--r-- | sql/service_wsrep.cc | 23 | ||||
-rw-r--r-- | sql/sql_plugin_services.ic | 3 | ||||
-rw-r--r-- | sql/wsrep_dummy.cc | 6 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 296 |
5 files changed, 168 insertions, 165 deletions
diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 1b1d54c036b..f16331f1214 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -88,6 +88,8 @@ extern struct wsrep_service_st { my_bool (*wsrep_thd_has_ignored_error_func)(const MYSQL_THD thd); void (*wsrep_thd_set_ignored_error_func)(MYSQL_THD thd, my_bool val); bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd); + void (*wsrep_report_bf_lock_wait_func)(const MYSQL_THD thd, + unsigned long long trx_id); } *wsrep_service; #define MYSQL_SERVICE_WSREP_INCLUDED @@ -132,6 +134,7 @@ extern struct wsrep_service_st { #define wsrep_thd_has_ignored_error(T) wsrep_service->wsrep_thd_has_ignored_error_func(T) #define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V) #define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2) +#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #else #define MYSQL_SERVICE_WSREP_STATIC_INCLUDED @@ -229,5 +232,7 @@ extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd); extern "C" my_bool wsrep_thd_has_ignored_error(const MYSQL_THD thd); extern "C" void wsrep_thd_set_ignored_error(MYSQL_THD thd, my_bool val); extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd); +extern "C" void wsrep_report_bf_lock_wait(const THD *thd, + unsigned long long trx_id); #endif #endif /* MYSQL_SERVICE_WSREP_INCLUDED */ diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index f61db1e80e8..35e455402e1 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -372,4 +372,27 @@ extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) } victim_thd->wsrep_aborter = bf_thd->thread_id; return false; +} + +extern "C" void wsrep_report_bf_lock_wait(const THD *thd, + unsigned long long trx_id) +{ + if (thd) + { + WSREP_ERROR("Thread %s trx_id: %llu thread: %ld " + "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " + "applier: %d toi: %d local: %d " + "query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + trx_id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_client_mode_str(thd), + wsrep_thd_transaction_state_str(thd), + wsrep_thd_is_applying(thd), + wsrep_thd_is_toi(thd), + wsrep_thd_is_local(thd), + wsrep_thd_query(thd)); + } }
\ No newline at end of file diff --git a/sql/sql_plugin_services.ic b/sql/sql_plugin_services.ic index e4f9f3c0d13..ca1cbeeeb3a 100644 --- a/sql/sql_plugin_services.ic +++ b/sql/sql_plugin_services.ic @@ -178,7 +178,8 @@ static struct wsrep_service_st wsrep_handler = { wsrep_OSU_method_get, wsrep_thd_has_ignored_error, wsrep_thd_set_ignored_error, - wsrep_thd_set_wsrep_aborter + wsrep_thd_set_wsrep_aborter, + wsrep_report_bf_lock_wait }; static struct thd_specifics_service_st thd_specifics_handler= diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index 139cd5cd7ae..497f4ddecc7 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -151,4 +151,8 @@ void wsrep_thd_set_ignored_error(THD*, my_bool) ulong wsrep_OSU_method_get(const THD*) { return 0;} bool wsrep_thd_set_wsrep_aborter(THD*, THD*) -{ return 0;}
\ No newline at end of file +{ return 0;} + +void wsrep_report_bf_lock_wait(const THD*, + unsigned long long) +{}
\ No newline at end of file diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 2c888f55b69..5c527bcf997 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -621,6 +621,57 @@ lock_rec_get_insert_intention( return(lock->type_mode & LOCK_INSERT_INTENTION); } +#ifdef WITH_WSREP +/** Check if both conflicting lock and other record lock are brute force +(BF). This case is a bug so report lock information and wsrep state. +@param[in] lock_rec1 conflicting waiting record lock or NULL +@param[in] lock_rec2 other waiting record lock +@param[in] trx1 lock_rec1 can be NULL, trx +*/ +static void wsrep_assert_no_bf_bf_wait( + const lock_t* lock_rec1, + const lock_t* lock_rec2, + const trx_t* trx1) +{ + ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC); + ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC); + + if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep()) + return; + if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE))) + return; + if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE))) + return; + + mtr_t mtr; + + if (lock_rec1) { + ib::error() << "Waiting lock on table: " + << lock_rec1->index->table->name + << " index: " + << lock_rec1->index->name() + << " that has conflicting lock "; + lock_rec_print(stderr, lock_rec1, mtr); + } + + ib::error() << "Conflicting lock on table: " + << lock_rec2->index->table->name + << " index: " + << lock_rec2->index->name() + << " that has lock "; + lock_rec_print(stderr, lock_rec2, mtr); + + ib::error() << "WSREP state: "; + + wsrep_report_bf_lock_wait(trx1->mysql_thd, + trx1->id); + wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd, + lock_rec2->trx->id); + /* BF-BF wait is a bug */ + ut_error; +} +#endif /* WITH_WSREP */ + /*********************************************************************//** Checks if a lock request for a new lock has to wait for request lock2. @return TRUE if new lock has to wait for lock2 to be removed */ @@ -727,72 +778,9 @@ lock_rec_has_to_wait( } #ifdef WITH_WSREP - /* if BF thread is locking and has conflict with another BF - thread, we need to look at trx ordering and lock types */ - if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) - && wsrep_thd_is_BF(lock2->trx->mysql_thd, FALSE)) { - mtr_t mtr; - - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "BF-BF lock conflict, locking: " - << for_locking; - lock_rec_print(stderr, lock2, mtr); - ib::info() - << " SQL1: " << wsrep_thd_query(trx->mysql_thd) - << " SQL2: " - << wsrep_thd_query(lock2->trx->mysql_thd); - } - - if ((type_mode & LOCK_MODE_MASK) == LOCK_X - && (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) { - if (for_locking || UNIV_UNLIKELY(wsrep_debug)) { - /* exclusive lock conflicts are not - accepted */ - ib::info() - << "BF-BF X lock conflict,mode: " - << type_mode - << " supremum: " << lock_is_on_supremum - << "conflicts states: my " - << wsrep_thd_transaction_state_str( - trx->mysql_thd) - << " locked " - << wsrep_thd_transaction_state_str( - lock2->trx->mysql_thd); - lock_rec_print(stderr, lock2, mtr); - ib::info() << " SQL1: " - << wsrep_thd_query(trx->mysql_thd) - << " SQL2: " - << wsrep_thd_query( - lock2->trx->mysql_thd); - - if (for_locking) { - return false; - } - } - } else { - /* if lock2->index->n_uniq <= - lock2->index->n_user_defined_cols - operation is on uniq index - */ - if (wsrep_debug) { - ib::info() - << "BF conflict, modes: " << type_mode - << ":" << lock2->type_mode - << " idx: " << lock2->index->name() - << " table: " - << lock2->index->table->name - << " n_uniq: " << lock2->index->n_uniq - << " n_user: " - << lock2->index->n_user_defined_cols - << " SQL1: " - << wsrep_thd_query(trx->mysql_thd) - << " SQL2: " - << wsrep_thd_query( - lock2->trx->mysql_thd); - } - return false; - } - } + /* There should not be two conflicting locks that are + brute force. If there is it is a bug. */ + wsrep_assert_no_bf_bf_wait(nullptr, lock2, trx); #endif /* WITH_WSREP */ return true; @@ -1451,11 +1439,8 @@ lock_rec_create_low( trx_mutex_exit(c_lock->trx); if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "WSREP: c_lock canceled " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); } /* have to bail out here to avoid lock_set_lock... */ @@ -1534,6 +1519,7 @@ lock_rec_insert_by_trx_age( hash_table_t* hash; hash_cell_t* cell; + ut_ad(!in_lock->trx->is_wsrep()); space = in_lock->un_member.rec_lock.space; page_no = in_lock->un_member.rec_lock.page_no; rec_fold = lock_rec_fold(space, page_no); @@ -1798,27 +1784,19 @@ lock_rec_add_to_queue( = lock_rec_other_has_expl_req( mode, block, false, heap_no, trx); #ifdef WITH_WSREP - if (other_lock && trx->is_wsrep() && - !wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { - - ib::info() << "WSREP BF lock conflict for my lock:\n BF:" << - ((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_client_state_str(trx->mysql_thd) << " conflict: " << - wsrep_thd_transaction_state_str(trx->mysql_thd) << " seqno: " << - wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " << - wsrep_thd_query(trx->mysql_thd); - trx_t* otrx = other_lock->trx; - ib::info() << "WSREP other lock:\n BF:" << - ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_client_state_str(otrx->mysql_thd) << " conflict: " << - wsrep_thd_transaction_state_str(otrx->mysql_thd) << " seqno: " << - wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " << - wsrep_thd_query(otrx->mysql_thd); - } -#else - ut_a(!other_lock); + if (UNIV_UNLIKELY(other_lock && trx->is_wsrep())) { + /* Only BF transaction may be granted lock + before other conflicting lock request. */ + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) + && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { + /* If it is not BF, this case is a bug. */ + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); + ut_error; + } + } else #endif /* WITH_WSREP */ + ut_ad(!other_lock); } #endif /* UNIV_DEBUG */ @@ -2026,9 +2004,6 @@ lock_rec_has_to_wait_in_queue( hash = lock_hash_get(wait_lock->type_mode); for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no); -#ifdef WITH_WSREP - lock && -#endif lock != wait_lock; lock = lock_rec_get_next_on_page_const(lock)) { const byte* p = (const byte*) &lock[1]; @@ -2036,24 +2011,6 @@ lock_rec_has_to_wait_in_queue( if (heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) && lock_has_to_wait(wait_lock, lock)) { -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) && - wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)) { - - if (UNIV_UNLIKELY(wsrep_debug)) { - mtr_t mtr; - ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id) - << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd); - lock_rec_print(stderr, wait_lock, mtr); - ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id) - << " query: " << wsrep_thd_query(lock->trx->mysql_thd); - lock_rec_print(stderr, lock, mtr); - } - /* don't wait for another BF lock */ - continue; - } -#endif /* WITH_WSREP */ - return(lock); } } @@ -2168,6 +2125,7 @@ lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no) lock = previous->hash; } + ut_ad(!lock->trx->is_wsrep()); ut_ad(previous->hash == lock || previous == lock); /* Grant locks if there are no conflicting locks ahead. Move granted locks to the head of the list. */ @@ -2238,11 +2196,18 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock) lock != NULL; lock = lock_rec_get_next_on_page(lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { + if (!lock_get_wait(lock)) { + continue; + } + const lock_t* c = lock_rec_has_to_wait_in_queue(lock); + if (!c) { /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); +#ifdef WITH_WSREP + } else { + wsrep_assert_no_bf_bf_wait(c, lock, c->trx); +#endif /* WITH_WSREP */ } } } else { @@ -3506,11 +3471,8 @@ lock_table_create( ut_list_insert(table->locks, c_lock, lock, TableLockGetNode()); if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "table lock BF conflict for " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); } } else { ut_list_append(table->locks, lock, TableLockGetNode()); @@ -3522,6 +3484,8 @@ lock_table_create( c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE; if (UNIV_UNLIKELY(wsrep_debug)) { + wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); + wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); wsrep_print_wait_locks(c_lock); } @@ -3531,14 +3495,6 @@ lock_table_create( lock_cancel_waiting_and_release( c_lock->trx->lock.wait_lock); trx_mutex_enter(trx); - - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "WSREP: c_lock canceled " - << ib::hex(c_lock->trx->id) - << " SQL: " - << wsrep_thd_query( - c_lock->trx->mysql_thd); - } } trx_mutex_exit(c_lock->trx); @@ -4096,6 +4052,7 @@ lock_grant_and_move_on_rec( } lock = previous->hash; } + ut_ad(!lock->trx->is_wsrep()); /* Grant locks if there are no conflicting locks ahead. Move granted locks to the head of the list. */ for (;lock != NULL;) { @@ -4195,12 +4152,18 @@ released: for (lock = first_lock; lock != NULL; lock = lock_rec_get_next(heap_no, lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - + if (!lock_get_wait(lock)) { + continue; + } + const lock_t* c = lock_rec_has_to_wait_in_queue(lock); + if (!c) { /* Grant the lock */ ut_ad(trx != lock->trx); lock_grant(lock); +#ifdef WITH_WSREP + } else { + wsrep_assert_no_bf_bf_wait(c, lock, c->trx); +#endif /* WITH_WSREP */ } } } else { @@ -4855,24 +4818,28 @@ func_exit: explicit granted lock. */ #ifdef WITH_WSREP - if (other_lock->trx->is_wsrep()) { - if (!lock_get_wait(other_lock) ) { - ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" << - ((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_client_state_str(impl_trx->mysql_thd) << " conflict: " << - wsrep_thd_transaction_state_str(impl_trx->mysql_thd) << " seqno: " << - wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " << - wsrep_thd_query(impl_trx->mysql_thd); - - trx_t* otrx = other_lock->trx; - - ib::info() << "WSREP other lock:\n BF:" << - ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " << - wsrep_thd_client_state_str(otrx->mysql_thd) << " conflict: " << - wsrep_thd_transaction_state_str(otrx->mysql_thd) << " seqno: " << - wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " << - wsrep_thd_query(otrx->mysql_thd); - } + /** Galera record locking rules: + * If there is no other record lock to the same record, we may grant + the lock request. + * If there is other record lock but this requested record lock is + compatible, we may grant the lock request. + * If there is other record lock and it is not compatible with + requested lock, all normal transactions must wait. + * BF (brute force) additional exceptions : + ** If BF already holds record lock for requested record, we may + grant new record lock even if there is conflicting record lock(s) + waiting on a queue. + ** If conflicting transaction holds requested record lock, + we will cancel this record lock and select conflicting transaction + for BF abort or kill victim. + ** If conflicting transaction is waiting for requested record lock + we will cancel this wait and select conflicting transaction + for BF abort or kill victim. + ** There should not be two BF transactions waiting for same record lock + */ + if (other_lock->trx->is_wsrep() && !lock_get_wait(other_lock)) { + wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, @@ -4881,9 +4848,11 @@ func_exit: } } else #endif /* WITH_WSREP */ - ut_ad(lock_get_wait(other_lock)); - ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, impl_trx)); + { + ut_ad(lock_get_wait(other_lock)); + ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, impl_trx)); + } } mutex_exit(&impl_trx->mutex); @@ -4915,13 +4884,20 @@ func_exit: mode, block, false, heap_no, lock->trx); #ifdef WITH_WSREP - ut_a(!other_lock - || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) - || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)); - -#else - ut_a(!other_lock); + if (UNIV_UNLIKELY(other_lock && lock->trx->is_wsrep())) { + /* Only BF transaction may be granted + lock before other conflicting lock + request. */ + if (!wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) + && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) { + /* If no BF, this case is a bug. */ + wsrep_report_bf_lock_wait(lock->trx->mysql_thd, lock->trx->id); + wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); + ut_error; + } + } else #endif /* WITH_WSREP */ + ut_ad(!other_lock); } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { ut_a(lock_rec_has_to_wait_in_queue(lock)); @@ -6131,12 +6107,6 @@ lock_trx_handle_wait( /*=================*/ trx_t* trx) /*!< in/out: trx lock state */ { -#ifdef WITH_WSREP - /* We already own mutexes */ - if (trx->lock.was_chosen_as_wsrep_victim) { - return lock_trx_handle_wait_low(trx); - } -#endif /* WITH_WSREP */ lock_mutex_enter(); trx_mutex_enter(trx); dberr_t err = lock_trx_handle_wait_low(trx); |