From 8e3e87d2fc1e63d287f203d441dcb9360775c6b7 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Tue, 8 Dec 2020 18:02:42 +0200 Subject: MDEV-23851 MDEV-24229 BF-BF conflict issues Issues MDEV-23851 and MDEV-24229 are probably duplicates and are caused by the new self-asserting function lock0lock.cc:wsrep_assert_no_bf_bf_wait(). The criteria for asserting is too strict and does not take in consideration scenarios of "false positive" lock conflicts, which are resolved by replaying the local transaction. As a fix, this PR is relaxing the assert criteria by two conditions, which skip assert if high priority transactions are locking in correct order or if conflicting high priority lock holder is aborting and has just not yet released the lock. Alternative fix would be to remove wsrep_assert_no_bf_bf_wait() altogether, or remove the assert in this function and let it only print warnings in error log. But in my high conflict rate multi-master test scenario, this relaxed asserting appears to be safe. This PR also removes two wsrep_report_bf_lock_wait() calls in innodb lock manager, which cause mutex access assert in debug builds. Foreign key appending missed handling of data types of float and double in INSERT execution. This is not directly related to the actual issue here but is fixed in this PR nevertheless. Missing these foreign keys values in certification could cause problems in some multi-master load scenarios. Finally, some problem reports suggest that some of the issues reported in MDEV-23851 might relate to false positive lock conflicts over unique secondary index gaps. There is separate work for relaxing UK index gap locking of replication appliers, and separate PR will be submitted for it, with a related mtr test as well. --- storage/innobase/lock/lock0lock.cc | 19 ++++++++++++++----- storage/innobase/rem/rem0rec.cc | 17 ++++++++++++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 913c80027bf..ec37f9029f7 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -666,6 +666,20 @@ static void wsrep_assert_no_bf_bf_wait( if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE))) return; + /* if BF - BF order is honored, we can keep trx1 waiting for the lock */ + if (wsrep_trx_order_before(trx1->mysql_thd, lock_rec2->trx->mysql_thd)) + return; + + /* avoiding BF-BF conflict assert, if victim is already aborting + or rolling back for replaying + */ + wsrep_thd_LOCK(lock_rec2->trx->mysql_thd); + if (wsrep_trx_is_aborting(lock_rec2->trx->mysql_thd)) { + wsrep_thd_UNLOCK(lock_rec2->trx->mysql_thd); + return; + } + wsrep_thd_UNLOCK(lock_rec2->trx->mysql_thd); + mtr_t mtr; if (lock_rec1) { @@ -1522,11 +1536,6 @@ lock_rec_create_low( trx_mutex_exit(c_lock->trx); - if (UNIV_UNLIKELY(wsrep_debug)) { - wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id); - wsrep_report_bf_lock_wait(c_lock->trx->mysql_thd, c_lock->trx->id); - } - /* have to bail out here to avoid lock_set_lock... */ return(lock); } diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 962de60f555..fa6a6c738e6 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -2283,9 +2283,24 @@ wsrep_rec_get_foreign_key( break; case DATA_BLOB: case DATA_BINARY: + case DATA_FIXBINARY: + case DATA_GEOMETRY: memcpy(buf, data, len); break; - default: + + case DATA_FLOAT: + { + float f = mach_float_read(data); + memcpy(buf, &f, sizeof(float)); + } + break; + case DATA_DOUBLE: + { + double d = mach_double_read(data); + memcpy(buf, &d, sizeof(double)); + } + break; + default: break; } -- cgit v1.2.1 From 5b9ee8d8193a8c7a8ebdd35eedcadc3ae78e7fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Dec 2020 12:06:22 +0200 Subject: MDEV-24449 Corruption of system tablespace or last recovered page This corresponds to 10.5 commit 39378e1366f78b38c05e45103b9fb9c829cc5f4f. With a patched version of the test innodb.ibuf_not_empty (so that it would trigger crash recovery after using the change buffer), and patched code that would modify the os_thread_sleep() in recv_apply_hashed_log_recs() to be 1ms as well as add a sleep of the same duration to the end of recv_recover_page() when recv_sys->n_addrs=0, we can demonstrate a race condition. After disabling some debug checks in buf_all_freed_instance(), buf_pool_invalidate_instance() and buf_validate(), we managed to trigger an assertion failure in fseg_free_step(), on the XDES_FREE_BIT. In other words, an trx_undo_seg_free() call during trx_rollback_resurrected() was attempting a double-free of a page. This was repeated about once in 400 to 500 test runs. With the fix applied, the test passed 2,000 runs. recv_apply_hashed_log_recs(): Do not only wait for recv_sys->n_addrs to reach 0, but also wait for buf_get_n_pending_read_ios() to reach 0, to guarantee that buf_page_io_complete() will not be executing ibuf_merge_or_delete_for_page(). --- storage/innobase/log/log0recv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 4c3886caeaf..95179ec2271 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2501,7 +2501,7 @@ apply: /* Wait until all the pages have been processed */ - while (recv_sys->n_addrs != 0) { + while (recv_sys->n_addrs || buf_get_n_pending_read_ios()) { const bool abort = recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs; -- cgit v1.2.1