From 66c05326d2a756329ce6fe2c5abb21230b424b4e Mon Sep 17 00:00:00 2001 From: sjaakola Date: Thu, 6 Oct 2022 15:16:06 +0300 Subject: MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster conflict victim's THD is marked with wsrep_aborter. THD::wsrep_aorter holds the thread ID of the hight priority tread, which is currently carrying out BF aborting for this victim. However, the BF abort operation is not always successful, and in such case the wsrep_aborter mark should be removed. In the old code, this wsrep_aborter resetting did not happen, and this could lead to a situation where the sticky wsrep_aborter mark prevents any further attempt to BF abort this transaction. This commit fixes this issue, and resets wsrep_aborter after unsuccesful BF abort attempt. Reviewed-by: Jan Lindström --- sql/service_wsrep.cc | 11 +++++++++-- sql/wsrep_thd.cc | 21 +++++++++++++-------- sql/wsrep_thd.h | 6 ++++-- storage/innobase/handler/ha_innodb.cc | 10 ++++++++++ 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 291d8dfbef8..7b0a1e5495e 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -349,13 +349,20 @@ extern "C" void wsrep_commit_ordered(THD *thd) extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) { - WSREP_DEBUG("wsrep_thd_set_wsrep_aborter called"); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + if (!bf_thd) + { + victim_thd->wsrep_aborter= 0; + WSREP_DEBUG("wsrep_thd_set_wsrep_aborter resetting wsrep_aborter"); + return false; + } if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) { return true; } - victim_thd->wsrep_aborter = bf_thd->thread_id; + victim_thd->wsrep_aborter= bf_thd->thread_id; + WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u", + victim_thd->wsrep_aborter); return false; } diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 6900efa8bc9..05c96491906 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Codership Oy +/* Copyright (C) 2013-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -308,11 +308,11 @@ void wsrep_fire_rollbacker(THD *thd) } -int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) +int wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) { DBUG_ENTER("wsrep_abort_thd"); - THD *victim_thd= (THD *) victim_thd_ptr; - THD *bf_thd= (THD *) bf_thd_ptr; mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); @@ -323,16 +323,21 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) if ((WSREP(bf_thd) || ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && wsrep_thd_is_toi(bf_thd))) && - victim_thd && !wsrep_thd_is_aborting(victim_thd)) { - WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? - (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", + (long long)bf_thd->real_id, (long long)victim_thd->real_id); ha_abort_transaction(bf_thd, victim_thd, signal); } else { - WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu " + "wsrep %d wsrep_on %d RSU %d TOI %d aborting %d", + (long long)bf_thd->real_id, (long long)victim_thd->real_id, + WSREP_NNULL(bf_thd), WSREP_ON, + bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, + wsrep_thd_is_toi(bf_thd), + wsrep_thd_is_aborting(victim_thd)); wsrep_thd_UNLOCK(victim_thd); } diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index e9add662e3f..cf8528c3165 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Codership Oy +/* Copyright (C) 2013-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -88,7 +88,9 @@ bool wsrep_create_appliers(long threads, bool mutex_protected=false); void wsrep_create_rollbacker(); bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd); -int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal); +int wsrep_abort_thd(THD *bf_thd, + THD *victim_thd, + my_bool signal) __attribute__((nonnull(1,2))); /* Helper methods to deal with thread local storage. diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6f781a1f291..b8e2aea204b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18748,6 +18748,16 @@ wsrep_kill_victim( lock_cancel_waiting_and_release(wait_lock); } } + else + { + wsrep_thd_LOCK(thd); + victim_trx->lock.was_chosen_as_wsrep_victim= false; + wsrep_thd_set_wsrep_aborter(NULL, thd); + wsrep_thd_UNLOCK(thd); + + WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive", + thd_get_thread_id(thd)); + } DBUG_VOID_RETURN; } -- cgit v1.2.1 From cd97523dcff4a1a4b1d751d505bd2325aa29b074 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Thu, 29 Dec 2022 12:59:34 +0200 Subject: MDEV-30317 Transaction savepoint may cause failure in galera replaying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created mtr test for reproducing the crash Developed actual fix for the issue. Setting THD::system_thread_info.rpl_sql_info for replayer thread, same way as it is handled for appliers. Recorded test result, with the fix Reviewed-by: Jan Lindström --- .../suite/galera/r/galera_savepoint_replay.result | 53 +++++++++++++ .../suite/galera/t/galera_savepoint_replay.test | 86 ++++++++++++++++++++++ sql/wsrep_high_priority_service.cc | 35 +++++---- 3 files changed, 158 insertions(+), 16 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_savepoint_replay.result create mode 100644 mysql-test/suite/galera/t/galera_savepoint_replay.test diff --git a/mysql-test/suite/galera/r/galera_savepoint_replay.result b/mysql-test/suite/galera/r/galera_savepoint_replay.result new file mode 100644 index 00000000000..afea5f82e3c --- /dev/null +++ b/mysql-test/suite/galera/r/galera_savepoint_replay.result @@ -0,0 +1,53 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1)); +INSERT INTO t1 VALUES (1, 'a'); +INSERT INTO t1 VALUES (2, 'a'); +connection node_1; +SET AUTOCOMMIT=ON; +START TRANSACTION; +UPDATE t1 SET f2 = 'b' WHERE f1 = 1; +SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE; +f1 f2 +2 a +SAVEPOINT my_sp; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_2; +UPDATE t1 SET f2 = 'c' WHERE f1 = 2; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +connection node_1; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +connection node_1; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +COUNT(*) = 1 +1 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; +COUNT(*) = 1 +1 +wsrep_local_replays +1 +connection node_2; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +COUNT(*) = 1 +1 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; +COUNT(*) = 1 +1 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_savepoint_replay.test b/mysql-test/suite/galera/t/galera_savepoint_replay.test new file mode 100644 index 00000000000..cff26f4a94f --- /dev/null +++ b/mysql-test/suite/galera/t/galera_savepoint_replay.test @@ -0,0 +1,86 @@ +# +# This test tests replaying a transaction with savepoint +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1)); +INSERT INTO t1 VALUES (1, 'a'); +INSERT INTO t1 VALUES (2, 'a'); + +--connection node_1 +SET AUTOCOMMIT=ON; +START TRANSACTION; + +UPDATE t1 SET f2 = 'b' WHERE f1 = 1; +SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE; +SAVEPOINT my_sp; + +# Block the applier on node #1 and issue a conflicting update on node #2 +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +SET SESSION wsrep_sync_wait=0; +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_2 +UPDATE t1 SET f2 = 'c' WHERE f1 = 2; + +--connection node_1a +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Block the commit, send the COMMIT and wait until it gets blocked + +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +--send COMMIT + +--connection node_1a + +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Let the conflicting UPDATE proceed and wait until it hits abort_trx_end. +# The victim transaction still sits in commit_monitor_master_sync_point. + +--let $galera_sync_point = abort_trx_end +--source include/galera_set_sync_point.inc +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = abort_trx_end commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc + +# Let the transactions proceed +--source include/galera_clear_sync_point.inc +--let $galera_sync_point = abort_trx_end +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_signal_sync_point.inc + +# Commit succeeds +--connection node_1 +--reap + +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; + +# wsrep_local_replays has increased by 1 +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays; +--enable_query_log + +--connection node_2 +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b'; +SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; + +DROP TABLE t1; + diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 700ac599cee..c396a9eeae5 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Codership Oy +/* Copyright 2018-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -619,6 +619,9 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd) transactional locks */ DBUG_ASSERT(!orig_thd->mdl_context.has_transactional_locks()); + replayer_thd->system_thread_info.rpl_sql_info= + new rpl_sql_thread_info(replayer_thd->wsrep_rgi->rli->mi->rpl_filter); + /* Make a shadow copy of diagnostics area and reset */ m_da_shadow.status= orig_thd->get_stmt_da()->status(); if (m_da_shadow.status == Diagnostics_area::DA_OK) @@ -657,35 +660,35 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd) Wsrep_replayer_service::~Wsrep_replayer_service() { - THD* replayer_thd= m_thd; - THD* orig_thd= m_orig_thd; - /* Switch execution context back to original. */ - wsrep_after_apply(replayer_thd); - wsrep_after_command_ignore_result(replayer_thd); - wsrep_close(replayer_thd); - wsrep_reset_threadvars(replayer_thd); - wsrep_store_threadvars(orig_thd); + wsrep_after_apply(m_thd); + wsrep_after_command_ignore_result(m_thd); + wsrep_close(m_thd); + wsrep_reset_threadvars(m_thd); + wsrep_store_threadvars(m_orig_thd); - DBUG_ASSERT(!orig_thd->get_stmt_da()->is_sent()); - DBUG_ASSERT(!orig_thd->get_stmt_da()->is_set()); + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_sent()); + DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_set()); + + delete m_thd->system_thread_info.rpl_sql_info; + m_thd->system_thread_info.rpl_sql_info= nullptr; if (m_replay_status == wsrep::provider::success) { - DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success); - orig_thd->reset_kill_query(); - my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); + DBUG_ASSERT(m_thd->wsrep_cs().current_error() == wsrep::e_success); + m_orig_thd->reset_kill_query(); + my_ok(m_orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id); } else if (m_replay_status == wsrep::provider::error_certification_failed) { - wsrep_override_error(orig_thd, ER_LOCK_DEADLOCK); + wsrep_override_error(m_orig_thd, ER_LOCK_DEADLOCK); } else { DBUG_ASSERT(0); WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", m_replay_status, - orig_thd->db.str, wsrep_thd_query(orig_thd)); + m_orig_thd->db.str, wsrep_thd_query(m_orig_thd)); unireg_abort(1); } } -- cgit v1.2.1 From 68cfcf9cb6821c3d333b97b213b44627e433861c Mon Sep 17 00:00:00 2001 From: sjaakola Date: Tue, 8 Nov 2022 16:36:34 +0200 Subject: MDEV-29512 deadlock between commit monitor and THD::LOCK_thd_data mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit contains only a mtr test for reproducing the issue in MDEV-29512 The actual fix will be pushed in wsrep-lib repository The hanging in MDEV-29512 happens when binlog purging is attempted, and there is one local BF aborted transaction waiting for commit monitor. The test will launch two node cluster and enable binlogging with expire log days, to force binlog purging to happen. A local transaction is executed so that will become BF abort victim, and has advanced to replication stage waiting for commit monitor for final cleanup (to mark position in innodb) after that, applier is released to complete the BF abort and due to binlog configuration, starting the binlog purging. This is where the hanging would occur, if code is buggy Reviewed-by: Jan Lindström --- mysql-test/suite/galera/r/galera_MDEV-29512.result | 40 ++++++++++ mysql-test/suite/galera/t/galera_MDEV-29512.cnf | 15 ++++ mysql-test/suite/galera/t/galera_MDEV-29512.test | 91 ++++++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 mysql-test/suite/galera/r/galera_MDEV-29512.result create mode 100644 mysql-test/suite/galera/t/galera_MDEV-29512.cnf create mode 100644 mysql-test/suite/galera/t/galera_MDEV-29512.test diff --git a/mysql-test/suite/galera/r/galera_MDEV-29512.result b/mysql-test/suite/galera/r/galera_MDEV-29512.result new file mode 100644 index 00000000000..aaf24df920e --- /dev/null +++ b/mysql-test/suite/galera/r/galera_MDEV-29512.result @@ -0,0 +1,40 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000)); +INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200)); +INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200)); +SET SESSION wsrep_sync_wait=0; +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +SET SESSION wsrep_sync_wait=0; +connection node_1; +begin; +select f1,f2 from t1; +f1 f2 +1 0 +3 3 +connection node_2; +UPDATE t1 SET f2=2 WHERE f1=3; +connection node_1a; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +connection node_1; +UPDATE t1 SET f2=1 WHERE f1=3; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; +connection node_1; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +select f1,f2 from t1; +f1 f2 +1 0 +3 2 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_MDEV-29512.cnf b/mysql-test/suite/galera/t/galera_MDEV-29512.cnf new file mode 100644 index 00000000000..bf8e0c37984 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_MDEV-29512.cnf @@ -0,0 +1,15 @@ +!include ../galera_2nodes.cnf + +[mysqld] +log-bin +log-slave-updates + +[mysqld.1] +log_bin +log_slave_updates +max-binlog-size=4096 +expire-logs-days=1 + + +[mysqld.2] + diff --git a/mysql-test/suite/galera/t/galera_MDEV-29512.test b/mysql-test/suite/galera/t/galera_MDEV-29512.test new file mode 100644 index 00000000000..ffcef792f85 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_MDEV-29512.test @@ -0,0 +1,91 @@ +# +# This test is for reproducing the issue in: +# https://jira.mariadb.org/browse/MDEV-29512 +# +# The hanging in MDEV-29512 happens when binlog purging is attempted, and there is +# one local BF aborted transaction waiting for commit monitor. +# +# The test will launch two node cluster and enable binlogging with expire log days, +# to force binlog purging to happen. +# A local transaction is executed so that will become BF abort victim, and has advanced +# to replication stage waiting for commit monitor for final cleanup (to mark position in innodb) +# after that, applier is released to complete the BF abort and due to binlog configuration, +# starting the binlog purging. This is where the hanging would occur, if code is buggy +# +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +# +# binlog size is limited to 4096 bytes, we will create enough events to +# cause binlog rotation +# +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000)); +INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200)); +INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200)); + +SET SESSION wsrep_sync_wait=0; + +# set sync point for replication applier +SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb"; + +# Control connection to manage sync points for appliers +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +SET SESSION wsrep_sync_wait=0; + +# starting local transaction, only select so far, +# write will happen later and this will be ordered after the transaction in node_2 +--connection node_1 +begin; +select f1,f2 from t1; + +# send from node 2 an UPDATE transaction, which will BF abort the transaction in node_1 +--connection node_2 +--let $wait_condition=select count(*)=2 from t1 +--source include/wait_condition.inc + +UPDATE t1 SET f2=2 WHERE f1=3; + +--connection node_1a +# wait to see the UPDATE from node_2 in apply_cb sync point +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; + +--connection node_1 +# now issuing conflicting update +UPDATE t1 SET f2=1 WHERE f1=3; + +# Block the local commit, send final COMMIT and wait until it gets blocked +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_set_sync_point.inc +--send COMMIT + +--connection node_1a +# wait for the local commit to enter in commit monitor wait state +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# release the local transaction to continue with commit +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_signal_sync_point.inc + +# and now release the applier, it should force local trx to abort +SET GLOBAL DEBUG_DBUG = ""; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +SET GLOBAL debug_dbug = NULL; +SET debug_sync='RESET'; + +--connection node_1 +--error ER_LOCK_DEADLOCK +--reap + +# wait until applying is complete +--let $wait_condition = SELECT COUNT(*)=1 FROM t1 WHERE f2=2 +--source include/wait_condition.inc + +# final read to verify what we got +select f1,f2 from t1; + +DROP TABLE t1; -- cgit v1.2.1 From 0ff7f33c7b5d0b5373472f4706aff4d19dc84258 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Wed, 12 Oct 2022 15:07:20 +0300 Subject: 10.4-MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rather recent thd_need_ordering_with() function does not take high priority transactions' order in consideration. Chaged this funtion to compare also transaction seqnos and favor earlier transaction. Reviewed-by: Jan Lindström --- sql/sql_class.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 10bee9e7aae..b516791b6da 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5230,8 +5230,9 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ if (WSREP_ON && - wsrep_thd_is_BF(const_cast(thd), false) && - wsrep_thd_is_BF(const_cast(other_thd), false)) + wsrep_thd_is_BF(thd, false) && + wsrep_thd_is_BF(other_thd, false) && + wsrep_thd_order_before(thd, other_thd)) return 0; #endif /* WITH_WSREP */ rgi= thd->rgi_slave; -- cgit v1.2.1 From 0595dd0f5674845637135feaa5cb7b28d0dc10a9 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 12 Jan 2023 18:45:40 +0200 Subject: MDEV-30080 Wrong result with LEFT JOINs involving constant tables The reason things fails in 10.5 and above is that test_quick_select() returns -1 (impossible range) for empty tables if there are any conditions attached. This didn't happen in 10.4 as the cost for a range was more than for a table scan with 0 rows and get_key_scan_params() did not create any range plans and thus did not mark the range as impossible. The code that checked the 'impossible range' conditions did not take into account all cases of LEFT JOIN usage. Adding an extra check if the table is used with an ON condition in case of 'impossible range' fixes the issue. --- mysql-test/main/join.result | 17 +++++++++++++++++ mysql-test/main/join.test | 15 +++++++++++++++ sql/sql_select.cc | 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/join.result b/mysql-test/main/join.result index b9543fc2c43..859bc588ed4 100644 --- a/mysql-test/main/join.result +++ b/mysql-test/main/join.result @@ -3407,3 +3407,20 @@ id select_type table type possible_keys key key_len ref rows Extra drop table t1,t2,t3; drop table t1000,t10,t03; # End of 10.3 tests +# +# MDEV-30080 Wrong result with LEFT JOINs involving constant tables +# +CREATE TABLE t1 (a INT) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1); +CREATE TABLE t2 (b INT) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1),(1); +CREATE TABLE t3 (c INT PRIMARY KEY) ENGINE=MyISAM; +SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.b = t3.c) ON t1.a = t2.b; +a b c +1 1 NULL +1 1 NULL +SELECT COUNT(*) FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.b = t3.c) ON t1.a = t2.b; +COUNT(*) +2 +DROP TABLE t1, t2, t3; +# End of 10.5 tests diff --git a/mysql-test/main/join.test b/mysql-test/main/join.test index b99f05f7c88..c8bd2886b30 100644 --- a/mysql-test/main/join.test +++ b/mysql-test/main/join.test @@ -1820,3 +1820,18 @@ drop table t1,t2,t3; drop table t1000,t10,t03; --echo # End of 10.3 tests + +--echo # +--echo # MDEV-30080 Wrong result with LEFT JOINs involving constant tables +--echo # + +CREATE TABLE t1 (a INT) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1); +CREATE TABLE t2 (b INT) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1),(1); +CREATE TABLE t3 (c INT PRIMARY KEY) ENGINE=MyISAM; +SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.b = t3.c) ON t1.a = t2.b; +SELECT COUNT(*) FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.b = t3.c) ON t1.a = t2.b; +DROP TABLE t1, t2, t3; + +--echo # End of 10.5 tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 98d5ccb7eb2..7bbe5c6a237 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5639,7 +5639,7 @@ make_join_statistics(JOIN *join, List &tables_list, caller to abort with a zero row result. */ TABLE_LIST *emb= s->table->pos_in_table_list->embedding; - if (emb && !emb->sj_on_expr) + if (emb && !emb->sj_on_expr && !*s->on_expr_ref) { /* Mark all tables in a multi-table join nest as const */ mark_join_nest_as_const(join, emb, &found_const_table_map, -- cgit v1.2.1 From 981a6b704475176d032c13461baffcaf88802d34 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 12 Jan 2023 22:31:18 +0200 Subject: MDEV-30395 Wrong result with semijoin and Federated as outer table The problem was that federated engine does not support comparable rowids which was not taken into account by semijoin code. Fixed by checking that we don't use semijoin with tables that does not support comparable rowids. Other things: - Fixed some typos in the code comments --- mysql-test/suite/federated/federatedx.result | 32 ++++++++++++++++++++++++++++ mysql-test/suite/federated/federatedx.test | 30 ++++++++++++++++++++++++++ sql/handler.h | 4 ++-- sql/opt_subselect.cc | 24 ++++++++++++++++++--- sql/sql_select.cc | 2 +- sql/sql_select.h | 4 +++- 6 files changed, 89 insertions(+), 7 deletions(-) diff --git a/mysql-test/suite/federated/federatedx.result b/mysql-test/suite/federated/federatedx.result index c18665e4d99..49deff81c4c 100644 --- a/mysql-test/suite/federated/federatedx.result +++ b/mysql-test/suite/federated/federatedx.result @@ -2325,6 +2325,38 @@ DROP TABLE federated.t1; connection slave; DROP TABLE federated.t1; connection default; +# +# MDEV-30395 Wrong result with semijoin and Federated as outer table +# +create server s foreign data wrapper mysql options (host "127.0.0.1", database "test", user "root", port MASTER_PORT); +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (3),(2),(3); +CREATE TABLE t2 (pk INT PRIMARY KEY); +INSERT INTO t2 VALUES (1),(2),(3),(4); +set @save_optimizer_switch=@@optimizer_switch; +set optimizer_switch="materialization=off"; +CREATE TABLE t2_fed ENGINE=FEDERATED CONNECTION='s/t2'; +explain SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2_fed ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 3 Using where +SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +pk +2 +3 +SET optimizer_switch='semijoin=off'; +explain SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2_fed ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 3 Using where +SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +pk +2 +3 +DROP TABLE t2_fed, t1, t2; +set @@optimizer_switch=@save_optimizer_switch; +DROP SERVER s; +# End of 10.5 tests connection master; DROP TABLE IF EXISTS federated.t1; DROP DATABASE IF EXISTS federated; diff --git a/mysql-test/suite/federated/federatedx.test b/mysql-test/suite/federated/federatedx.test index 51d34298626..7e5a335b786 100644 --- a/mysql-test/suite/federated/federatedx.test +++ b/mysql-test/suite/federated/federatedx.test @@ -2060,4 +2060,34 @@ connection slave; DROP TABLE federated.t1; connection default; +--echo # +--echo # MDEV-30395 Wrong result with semijoin and Federated as outer table +--echo # + + +--replace_result $MASTER_MYPORT MASTER_PORT +eval create server s foreign data wrapper mysql options (host "127.0.0.1", database "test", user "root", port $MASTER_MYPORT); + +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (3),(2),(3); +CREATE TABLE t2 (pk INT PRIMARY KEY); +INSERT INTO t2 VALUES (1),(2),(3),(4); + +set @save_optimizer_switch=@@optimizer_switch; +set optimizer_switch="materialization=off"; + +CREATE TABLE t2_fed ENGINE=FEDERATED CONNECTION='s/t2'; +explain SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +SET optimizer_switch='semijoin=off'; +explain SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); +SELECT * FROM t2_fed WHERE pk IN ( SELECT a FROM t1 ); + +DROP TABLE t2_fed, t1, t2; +set @@optimizer_switch=@save_optimizer_switch; + +DROP SERVER s; + +--echo # End of 10.5 tests + source include/federated_cleanup.inc; diff --git a/sql/handler.h b/sql/handler.h index 75cd88b8013..3191c408e56 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -356,9 +356,9 @@ enum chf_create_flags { Rowid's are not comparable. This is set if the rowid is unique to the current open handler, like it is with federated where the rowid is a pointer to a local result set buffer. The effect of having this set is - that the optimizer will not consirer the following optimizations for + that the optimizer will not consider the following optimizations for the table: - ror scans or filtering + ror scans, filtering or duplicate weedout */ #define HA_NON_COMPARABLE_ROWID (1ULL << 60) diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 3b3f9e56606..d516dc02b90 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -664,6 +664,17 @@ int check_and_do_in_subquery_rewrites(JOIN *join) DBUG_RETURN(-1); } } + /* Check if any table is not supporting comparable rowids */ + { + List_iterator_fast li(select_lex->outer_select()->leaf_tables); + TABLE_LIST *tbl; + while ((tbl = li++)) + { + TABLE *table= tbl->table; + if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + join->not_usable_rowid_map|= table->map; + } + } DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); /* @@ -683,8 +694,11 @@ int check_and_do_in_subquery_rewrites(JOIN *join) 9. Parent select is not a table-less select 10. Neither parent nor child select have STRAIGHT_JOIN option. 11. It is first optimisation (the subquery could be moved from ON - clause during first optimisation and then be considered for SJ - on the second when it is too late) + clause during first optimisation and then be considered for SJ + on the second when it is too late) + 12. All tables supports comparable rowids. + This is needed for DuplicateWeedout strategy to work (which + is the catch-all semi-join strategy so it must be applicable). */ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && in_subs && // 1 @@ -699,7 +713,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join) !((join->select_options | // 10 select_lex->outer_select()->join->select_options) // 10 & SELECT_STRAIGHT_JOIN) && // 10 - select_lex->first_cond_optimization) // 11 + select_lex->first_cond_optimization && // 11 + join->not_usable_rowid_map == 0) // 12 { DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); @@ -3544,6 +3559,9 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, } else { + /* Ensure that table supports comparable rowids */ + DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)); + sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_read); temptable_rec_size += p->table->table->file->ref_length; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 7bbe5c6a237..a721899a8be 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2309,7 +2309,7 @@ JOIN::optimize_inner() /* We have to remove constants and duplicates from group_list before calling make_join_statistics() as this may call get_best_group_min_max() - which needs a simplfied group_list. + which needs a simplified group_list. */ if (group_list && table_count == 1) { diff --git a/sql/sql_select.h b/sql/sql_select.h index d8b0ed290db..807b4115fec 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1238,6 +1238,8 @@ public: table_map outer_join; /* Bitmap of tables used in the select list items */ table_map select_list_used_tables; + /* Tables that has HA_NON_COMPARABLE_ROWID (does not support rowid) set */ + table_map not_usable_rowid_map; ha_rows send_records,found_records,join_examined_rows; /* @@ -1550,7 +1552,7 @@ public: table_count= 0; top_join_tab_count= 0; const_tables= 0; - const_table_map= found_const_table_map= 0; + const_table_map= found_const_table_map= not_usable_rowid_map= 0; aggr_tables= 0; eliminated_tables= 0; join_list= 0; -- cgit v1.2.1 From a44d896f98f2d2a3ebf0f1393bf84fd659ecd225 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Wed, 12 Oct 2022 14:13:49 +0300 Subject: 10.4-MDEV-29684 Fixes for cluster wide write conflict resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If two high priority threads have lock conflict, we look at the order of these transactions and honor the earlier transaction. for_locking parameter in lock_rec_has_to_wait() has become obsolete and it is now removed from the code . Reviewed-by: Jan Lindström --- sql/service_wsrep.cc | 6 +++-- sql/sql_class.cc | 2 -- storage/innobase/lock/lock0lock.cc | 53 +++++++++++++++++++++++++------------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 7b0a1e5495e..722c22809de 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -1,4 +1,4 @@ -/* Copyright 2018-2021 Codership Oy +/* Copyright 2018-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -274,7 +274,9 @@ extern "C" my_bool wsrep_thd_skip_locking(const THD *thd) extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) { - if (wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { + if (wsrep_thd_is_BF(left, false) && + wsrep_thd_is_BF(right, false) && + wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { WSREP_DEBUG("BF conflict, order: %lld %lld\n", (long long)wsrep_thd_trx_seqno(left), (long long)wsrep_thd_trx_seqno(right)); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index b516791b6da..464d64db73b 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5230,8 +5230,6 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ if (WSREP_ON && - wsrep_thd_is_BF(thd, false) && - wsrep_thd_is_BF(other_thd, false) && wsrep_thd_order_before(thd, other_thd)) return 0; #endif /* WITH_WSREP */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index e10d0c9f2b5..26388ad95e2 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -819,25 +819,34 @@ lock_rec_has_to_wait( } #ifdef WITH_WSREP - /* New lock request from a transaction is using unique key - scan and this transaction is a wsrep high priority transaction - (brute force). If conflicting transaction is also wsrep high - priority transaction we should avoid lock conflict because - ordering of these transactions is already decided and - conflicting transaction will be later replayed. Note - that thread holding conflicting lock can't be - committed or rolled back while we hold - lock_sys->mutex. */ - if (trx->is_wsrep_UK_scan() - && wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) { - return false; - } + /* New lock request from a transaction is using unique key + scan and this transaction is a wsrep high priority transaction + (brute force). If conflicting transaction is also wsrep high + priority transaction we should avoid lock conflict because + ordering of these transactions is already decided and + conflicting transaction will be later replayed. Note + that thread holding conflicting lock can't be + committed or rolled back while we hold + lock_sys->mutex. */ + if (trx->is_wsrep_UK_scan() + && wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) { + return false; + } - /* We very well can let bf to wait normally as other - BF will be replayed in case of conflict. For debug - builds we will do additional sanity checks to catch - unsupported bf wait if any. */ - ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx)); + /* If BF-BF conflict, we have to look at write set order */ + if (trx->is_wsrep() + && (type_mode & LOCK_MODE_MASK) == LOCK_X + && (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X + && wsrep_thd_order_before(trx->mysql_thd, + lock2->trx->mysql_thd)) { + return false; + } + + /* We very well can let bf to wait normally as other + BF will be replayed in case of conflict. For debug + builds we will do additional sanity checks to catch + unsupported bf wait if any. */ + ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx)); #endif /* WITH_WSREP */ return true; @@ -2043,6 +2052,14 @@ lock_rec_has_to_wait_in_queue( if (heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) && lock_has_to_wait(wait_lock, lock)) { +#ifdef WITH_WSREP + if (lock->trx->is_wsrep() + && wsrep_thd_order_before(wait_lock->trx->mysql_thd, + lock->trx->mysql_thd)) { + /* don't wait for another BF lock */ + continue; + } +#endif return(lock); } } -- cgit v1.2.1 From 3e8b6a79b7169f1b0526169b5c752920e8babf44 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Mon, 16 Jan 2023 10:23:22 +1100 Subject: Update sponsors --- CREDITS | 2 -- 1 file changed, 2 deletions(-) diff --git a/CREDITS b/CREDITS index 35092602ccf..9534d3e6e83 100644 --- a/CREDITS +++ b/CREDITS @@ -9,10 +9,8 @@ MariaDB Corporation https://www.mariadb.com (2013) Microsoft https://microsoft.com/ (2017) ServiceNow https://servicenow.com (2019) SIT https://sit.org (2022) -Tencent Cloud https://cloud.tencent.com (2017) Development Bank of Singapore https://dbs.com (2016) IBM https://www.ibm.com (2017) -Visma https://visma.com (2015) Automattic https://automattic.com (2019) Galera Cluster https://galeracluster.com (2020) Percona https://www.percona.com (2018) -- cgit v1.2.1 From 834650c7cfb53773bbb64b6ab874e23b43b8c874 Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Mon, 16 Jan 2023 14:59:59 +0100 Subject: New CC 3.1 --- libmariadb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmariadb b/libmariadb index 7fdb3eab663..d204e831042 160000 --- a/libmariadb +++ b/libmariadb @@ -1 +1 @@ -Subproject commit 7fdb3eab66384a355475704332d11cc1ab82499a +Subproject commit d204e83104222844251b221e9be7eb3dd9f8d63d -- cgit v1.2.1 From 9ec475c376fa6f46f69b30ee8874d62e4c0cb9dd Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Wed, 11 Jan 2023 17:28:22 +0100 Subject: MDEV-29171 changing the value of wsrep_gtid_domain_id with full cluster restart fails on some nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix `wsrep_init_gtid()` to avoid overwriting the domain id received during state transfer. Reviewed-by: Jan Lindström --- mysql-test/suite/galera_3nodes/r/MDEV-29171.result | 41 +++++++++++ mysql-test/suite/galera_3nodes/t/MDEV-29171.cnf | 14 ++++ mysql-test/suite/galera_3nodes/t/MDEV-29171.test | 83 ++++++++++++++++++++++ sql/wsrep_mysqld.cc | 4 +- 4 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/galera_3nodes/r/MDEV-29171.result create mode 100644 mysql-test/suite/galera_3nodes/t/MDEV-29171.cnf create mode 100644 mysql-test/suite/galera_3nodes/t/MDEV-29171.test diff --git a/mysql-test/suite/galera_3nodes/r/MDEV-29171.result b/mysql-test/suite/galera_3nodes/r/MDEV-29171.result new file mode 100644 index 00000000000..151be86d9cc --- /dev/null +++ b/mysql-test/suite/galera_3nodes/r/MDEV-29171.result @@ -0,0 +1,41 @@ +connection node_2; +connection node_1; +connection node_1; +select @@wsrep_gtid_domain_id,@@wsrep_node_name; +@@wsrep_gtid_domain_id @@wsrep_node_name +100 node1 +connection node_2; +select @@wsrep_gtid_domain_id,@@wsrep_node_name; +@@wsrep_gtid_domain_id @@wsrep_node_name +100 node2 +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connection node_3; +select @@wsrep_gtid_domain_id,@@wsrep_node_name; +@@wsrep_gtid_domain_id @@wsrep_node_name +100 node3 +connection node_3; +connection node_2; +connection node_1; +connection node_1; +# restart: --wsrep_new_cluster --wsrep_gtid_domain_id=200 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 200 +connection node_2; +# restart +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 200 +connection node_3; +# restart: --wsrep_sst_donor=node2 +show variables like 'wsrep_gtid_domain_id'; +Variable_name Value +wsrep_gtid_domain_id 200 +connection node_1; +set global wsrep_gtid_domain_id=100; +connection node_2; +set global wsrep_gtid_domain_id=100; +CALL mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node."); +connection node_3; +set global wsrep_gtid_domain_id=100; +CALL mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node."); diff --git a/mysql-test/suite/galera_3nodes/t/MDEV-29171.cnf b/mysql-test/suite/galera_3nodes/t/MDEV-29171.cnf new file mode 100644 index 00000000000..27f1c29f999 --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/MDEV-29171.cnf @@ -0,0 +1,14 @@ +!include ../galera_3nodes.cnf + +[mysqld.1] +wsrep-node-name="node1" +wsrep-gtid-mode=ON +wsrep-gtid-domain-id=100 + +[mysqld.2] +wsrep-node-name="node2" +wsrep-gtid-mode=ON + +[mysqld.3] +wsrep-node-name="node3" +wsrep-gtid-mode=ON diff --git a/mysql-test/suite/galera_3nodes/t/MDEV-29171.test b/mysql-test/suite/galera_3nodes/t/MDEV-29171.test new file mode 100644 index 00000000000..33fa4d722ae --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/MDEV-29171.test @@ -0,0 +1,83 @@ +# +# MDEV-29171: changing the value of wsrep_gtid_domain_id +# with full cluster restart fails on some nodes +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc + +# +# Initially wsrep gtid domain id is 100 +# +--connection node_1 +select @@wsrep_gtid_domain_id,@@wsrep_node_name; + +--connection node_2 +select @@wsrep_gtid_domain_id,@@wsrep_node_name; + +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--connection node_3 +select @@wsrep_gtid_domain_id,@@wsrep_node_name; + + +# +# Shutdown all nodes +# +--connection node_3 +--source include/shutdown_mysqld.inc + +--connection node_2 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--source include/shutdown_mysqld.inc + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--source include/shutdown_mysqld.inc + + +# +# Bootstrap from node_1 and change wsrep_gtid_domain_id to 200 +# +--connection node_1 +--let $restart_parameters = --wsrep_new_cluster --wsrep_gtid_domain_id=200 +--source include/start_mysqld.inc +show variables like 'wsrep_gtid_domain_id'; + + +# +# Restart node_2, expect that wsrep_gtid_domain_id has changed to 200 +# +--connection node_2 +--let $restart_parameters = +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--source include/start_mysqld.inc +show variables like 'wsrep_gtid_domain_id'; + + +# +# Restart node_3, select node_2 as donor +# If bug is present, node_3 remains on domain id 100 +# +--connection node_3 +--let $restart_parameters = --wsrep_sst_donor="node2" +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +--source include/start_mysqld.inc +# Expect domain id 200 +show variables like 'wsrep_gtid_domain_id'; + + +# +# Cleanup +# +--connection node_1 +set global wsrep_gtid_domain_id=100; + +--connection node_2 +set global wsrep_gtid_domain_id=100; +CALL mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node."); + +--connection node_3 +set global wsrep_gtid_domain_id=100; +CALL mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node."); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 3205b2dfa21..f8ee05d665c 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -364,10 +364,12 @@ static void wsrep_log_cb(wsrep::log::level level, void wsrep_init_gtid() { wsrep_server_gtid_t stored_gtid= wsrep_get_SE_checkpoint(); + // Domain id may have changed, use the one + // received during state transfer. + stored_gtid.domain_id= wsrep_gtid_server.domain_id; if (stored_gtid.server_id == 0) { rpl_gtid wsrep_last_gtid; - stored_gtid.domain_id= wsrep_gtid_server.domain_id; if (mysql_bin_log.is_open() && mysql_bin_log.lookup_domain_in_binlog_state(stored_gtid.domain_id, &wsrep_last_gtid)) -- cgit v1.2.1 From 107d54600ebf6e87c43c8a9d95b289fabf8f31fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 17 Jan 2023 14:06:04 +0200 Subject: Stabilize tests galera_gcache_recover and galera_gcache_recover_manytrx grepping on error log is not always successful as messages might be in different order or contain different values galera_vote_sr We need to make sure required table creation has replicated as we use WSREP_ON=off --- mysql-test/suite/galera/r/galera_gcache_recover.result | 2 -- .../suite/galera/r/galera_gcache_recover_manytrx.result | 2 -- mysql-test/suite/galera/t/galera_gcache_recover.test | 17 ----------------- .../suite/galera/t/galera_gcache_recover_manytrx.test | 16 ---------------- mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr.inc | 2 ++ 5 files changed, 2 insertions(+), 37 deletions(-) diff --git a/mysql-test/suite/galera/r/galera_gcache_recover.result b/mysql-test/suite/galera/r/galera_gcache_recover.result index 819c595ece3..72088a5447b 100644 --- a/mysql-test/suite/galera/r/galera_gcache_recover.result +++ b/mysql-test/suite/galera/r/galera_gcache_recover.result @@ -20,8 +20,6 @@ connection node_1; include/diff_servers.inc [servers=1 2] connection node_1; CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -include/assert_grep.inc [async IST sender starting to serve] connection node_2; CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -include/assert_grep.inc [Recovering GCache ring buffer: found gapless sequence] DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result index 9e486e2cdfd..8495bfde2f9 100644 --- a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result +++ b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result @@ -134,8 +134,6 @@ connection node_1; call mtr.add_suppression("Error in Log_event::read_log_event():.*"); CALL mtr.add_suppression("conflict state 7 after post commit"); CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -include/assert_grep.inc [async IST sender starting to serve] connection node_2; call mtr.add_suppression("Error in Log_event::read_log_event():.*"); CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -include/assert_grep.inc [Recovering GCache ring buffer: found gapless sequence] diff --git a/mysql-test/suite/galera/t/galera_gcache_recover.test b/mysql-test/suite/galera/t/galera_gcache_recover.test index e1bfe517d27..fe2a65ee14e 100644 --- a/mysql-test/suite/galera/t/galera_gcache_recover.test +++ b/mysql-test/suite/galera/t/galera_gcache_recover.test @@ -54,24 +54,7 @@ INSERT INTO t1 VALUES (3); # Warning happens when the cluster is started for the first time CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -# Confirm that IST took place ---let $assert_text = async IST sender starting to serve ---let $assert_select = async IST sender starting to serve ---let $assert_count = 1 ---let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.1.err ---let $assert_only_after = starting as process ---source include/assert_grep.inc - --connection node_2 CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -# Confirm that gcache recovery took place - ---let $assert_text = Recovering GCache ring buffer: found gapless sequence ---let $assert_select = Recovering GCache ring buffer: found gapless sequence ---let $assert_count = 1 ---let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.2.err ---let $assert_only_after = starting as process ---source include/assert_grep.inc - DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test b/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test index d92288b7881..8f0f0ed65ea 100644 --- a/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test +++ b/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test @@ -206,23 +206,7 @@ CALL mtr.add_suppression("conflict state 7 after post commit"); # Warning happens when the cluster is started for the first time CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -# Confirm that IST took place ---let $assert_text = async IST sender starting to serve ---let $assert_select = async IST sender starting to serve ---let $assert_count = 1 ---let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.1.err ---let $assert_only_after = starting as process ---source include/assert_grep.inc - --connection node_2 call mtr.add_suppression("Error in Log_event::read_log_event():.*"); CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -# Confirm that gcache recovery took place - ---let $assert_text = Recovering GCache ring buffer: found gapless sequence ---let $assert_select = Recovering GCache ring buffer: found gapless sequence ---let $assert_count = 1 ---let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.2.err ---let $assert_only_after = starting as process ---source include/assert_grep.inc diff --git a/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr.inc b/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr.inc index 776291cc9c0..9fe33e78eb5 100644 --- a/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr.inc +++ b/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr.inc @@ -8,6 +8,8 @@ CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 BLOB) ENGINE=InnoDB; # Introduce inconsistency --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc SET SESSION wsrep_on=OFF; --eval INSERT INTO t1 VALUES ($inconsistent_fragment, 'X') SET SESSION wsrep_on=ON; -- cgit v1.2.1 From a5eff044cb8543cc207ec51965a1d8fd51dd0576 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 3 Nov 2022 18:31:42 +0100 Subject: MDEV-22602 Disable UPDATE CASCADE for SQL constraints fix it for named constraints too --- mysql-test/suite/innodb/r/foreign_key.result | 2 ++ mysql-test/suite/innodb/t/foreign_key.test | 2 ++ sql/sql_table.cc | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/r/foreign_key.result b/mysql-test/suite/innodb/r/foreign_key.result index f8f389a32cf..851deb69fa5 100644 --- a/mysql-test/suite/innodb/r/foreign_key.result +++ b/mysql-test/suite/innodb/r/foreign_key.result @@ -943,6 +943,8 @@ DROP TABLE t1; # # TODO: enable them after MDEV-16417 is finished create or replace table t1 (a int primary key) engine=innodb; +create or replace table t2 (a int, constraint foo check(a > 0), foreign key(a) references t1(a) on update cascade) engine=innodb; +ERROR HY000: Function or expression 'a' cannot be used in the CHECK clause of `foo` create or replace table t2 (a int, check(a > 0), foreign key(a) references t1(a) on update cascade) engine=innodb; ERROR HY000: Function or expression 'a' cannot be used in the CHECK clause of `CONSTRAINT_1` create or replace table t1 (f1 int, f2 date, f3 date, key(f1,f3,f2)) engine=innodb; diff --git a/mysql-test/suite/innodb/t/foreign_key.test b/mysql-test/suite/innodb/t/foreign_key.test index 321c78a5598..135f8d7b024 100644 --- a/mysql-test/suite/innodb/t/foreign_key.test +++ b/mysql-test/suite/innodb/t/foreign_key.test @@ -971,6 +971,8 @@ DROP TABLE t1; --echo # TODO: enable them after MDEV-16417 is finished create or replace table t1 (a int primary key) engine=innodb; --error ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED +create or replace table t2 (a int, constraint foo check(a > 0), foreign key(a) references t1(a) on update cascade) engine=innodb; +--error ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED create or replace table t2 (a int, check(a > 0), foreign key(a) references t1(a) on update cascade) engine=innodb; create or replace table t1 (f1 int, f2 date, f3 date, key(f1,f3,f2)) engine=innodb; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 519a5f38868..dbab4c0067c 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -4622,7 +4622,7 @@ without_overlaps_err: my_error(ER_TOO_LONG_IDENT, MYF(0), check->name.str); DBUG_RETURN(TRUE); } - if (check_expression(check, &check->name, VCOL_CHECK_TABLE)) + if (check_expression(check, &check->name, VCOL_CHECK_TABLE, alter_info)) DBUG_RETURN(TRUE); } } -- cgit v1.2.1 From cce76fef381a92bf76f39d4da13981472ebb4cb7 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 3 Nov 2022 19:17:25 +0100 Subject: ADD CONSTRAINT IF NOT EXISTS didn't work in SP "if not exists" must be stored in a separate read-only property --- mysql-test/main/constraints.result | 34 ++++++++++++++++++++++++++++++++++ mysql-test/main/constraints.test | 17 +++++++++++++++++ sql/field.h | 2 +- sql/sql_lex.h | 2 +- sql/sql_table.cc | 4 +--- 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/mysql-test/main/constraints.result b/mysql-test/main/constraints.result index 53787fb5b65..105ea7cf1f4 100644 --- a/mysql-test/main/constraints.result +++ b/mysql-test/main/constraints.result @@ -183,7 +183,9 @@ t1 CREATE TABLE `t1` ( ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci DROP PROCEDURE sp; DROP TABLE t1; +# # End of 10.2 tests +# create table t1 (a int check (a>10)) select 100 as 'a'; show create table t1; Table Create Table @@ -201,3 +203,35 @@ a 19 ccc drop table t1; +create table t1 (a int, b int); +create procedure sp() alter table t1 add constraint if not exists foo check (b > 0); +call sp; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + CONSTRAINT `foo` CHECK (`b` > 0) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +call sp; +Warnings: +Note 1826 Duplicate CHECK constraint name 'foo' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + CONSTRAINT `foo` CHECK (`b` > 0) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +call sp; +Warnings: +Note 1826 Duplicate CHECK constraint name 'foo' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + CONSTRAINT `foo` CHECK (`b` > 0) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +drop procedure sp; +drop table t1; diff --git a/mysql-test/main/constraints.test b/mysql-test/main/constraints.test index 2f4dadcee9d..5c673f9be81 100644 --- a/mysql-test/main/constraints.test +++ b/mysql-test/main/constraints.test @@ -151,7 +151,9 @@ show create table t1; DROP PROCEDURE sp; DROP TABLE t1; +--echo # --echo # End of 10.2 tests +--echo # # # Check that we don't lose constraints as part of CREATE ... SELECT @@ -172,3 +174,18 @@ insert into t1 values ("ccc"); insert into t1 values (""); select * from t1; drop table t1; + +# +# add if not exists in SP +# + +create table t1 (a int, b int); +create procedure sp() alter table t1 add constraint if not exists foo check (b > 0); +call sp; +show create table t1; +call sp; +show create table t1; +call sp; +show create table t1; +drop procedure sp; +drop table t1; diff --git a/sql/field.h b/sql/field.h index 941090ed846..43bcfe5590a 100644 --- a/sql/field.h +++ b/sql/field.h @@ -558,7 +558,6 @@ static inline const char *vcol_type_name(enum_vcol_info_type type) #define VCOL_AUTO_INC 16 #define VCOL_IMPOSSIBLE 32 #define VCOL_NEXTVAL 64 /* NEXTVAL is not implemented for vcols */ -#define VCOL_CHECK_CONSTRAINT_IF_NOT_EXISTS 128 #define VCOL_NOT_STRICTLY_DETERMINISTIC \ (VCOL_NON_DETERMINISTIC | VCOL_TIME_FUNC | VCOL_SESSION_FUNC) @@ -590,6 +589,7 @@ public: bool stored_in_db; bool utf8; /* Already in utf8 */ bool automatic_name; + bool if_not_exists; Item *expr; Lex_ident name; /* Name of constraint */ /* see VCOL_* (VCOL_FIELD_REF, ...) */ diff --git a/sql/sql_lex.h b/sql/sql_lex.h index ecad3ea60ec..731ddbaefac 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -4375,7 +4375,7 @@ public: bool if_not_exists) { constr->name= name; - constr->flags= if_not_exists ? VCOL_CHECK_CONSTRAINT_IF_NOT_EXISTS : 0; + constr->if_not_exists= if_not_exists; alter_info.check_constraint_list.push_back(constr); return false; } diff --git a/sql/sql_table.cc b/sql/sql_table.cc index dbab4c0067c..24ce892fb12 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -6928,10 +6928,8 @@ remove_key: while ((check=it++)) { - if (!(check->flags & VCOL_CHECK_CONSTRAINT_IF_NOT_EXISTS) && - check->name.length) + if (!check->if_not_exists && check->name.length) continue; - check->flags= 0; for (c= share->field_check_constraints; c < share->table_check_constraints ; c++) { -- cgit v1.2.1 From 3b932255ccdf13b8abbb3a33882a410ac7e4b5b2 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 16 Jan 2023 12:05:15 +0100 Subject: cleanup: const_Item->real_item() allow real_item() to be called for const Item*, remove casts in the code --- sql/item.cc | 2 +- sql/item.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/item.cc b/sql/item.cc index 1a55317754e..ac5082dbdb2 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3444,7 +3444,7 @@ bool Item_field::is_null_result() bool Item_field::eq(const Item *item, bool binary_cmp) const { - Item *real_item2= ((Item *) item)->real_item(); + const Item *real_item2= item->real_item(); if (real_item2->type() != FIELD_ITEM) return 0; diff --git a/sql/item.h b/sql/item.h index 1273de44edb..01898709131 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1891,6 +1891,7 @@ public: virtual Item *copy_or_same(THD *thd) { return this; } virtual Item *copy_andor_structure(THD *thd) { return this; } virtual Item *real_item() { return this; } + const Item *real_item() const { return const_cast(this)->real_item(); } virtual Item *get_tmp_table_item(THD *thd) { return copy_or_same(thd); } virtual Item *make_odbc_literal(THD *thd, const LEX_CSTRING *typestr) { @@ -5404,7 +5405,7 @@ public: { return ref ? (*ref)->type() : REF_ITEM; } bool eq(const Item *item, bool binary_cmp) const override { - Item *it= ((Item *) item)->real_item(); + const Item *it= item->real_item(); return ref && (*ref)->eq(it, binary_cmp); } void save_val(Field *to) override; @@ -5762,7 +5763,7 @@ public: { orig_item->make_send_field(thd, field); } bool eq(const Item *item, bool binary_cmp) const override { - Item *it= const_cast(item)->real_item(); + const Item *it= item->real_item(); return orig_item->eq(it, binary_cmp); } void fix_after_pullout(st_select_lex *new_parent, Item **refptr, bool merge) @@ -7638,7 +7639,7 @@ public: { m_item->make_send_field(thd, field); } bool eq(const Item *item, bool binary_cmp) const { - Item *it= ((Item *) item)->real_item(); + const Item *it= item->real_item(); return m_item->eq(it, binary_cmp); } void fix_after_pullout(st_select_lex *new_parent, Item **refptr, bool merge) -- cgit v1.2.1 From 22cd3358b3c4faef2e3e82f74312a9fb709d9f03 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 16 Jan 2023 12:08:05 +0100 Subject: fix failures of main.func_json --ps in normal execution, the item is wrapped in Item_func_conv_charset. in --ps the whole is wrapped again in Item_direct_ref_to_item --- sql/item_jsonfunc.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 0f1fefac7b2..e6507278381 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1620,7 +1620,7 @@ static bool is_json_type(const Item *item) if (Type_handler_json_common::is_json_type_handler(item->type_handler())) return true; const Item_func_conv_charset *func; - if (!(func= dynamic_cast(item))) + if (!(func= dynamic_cast(item->real_item()))) return false; item= func->arguments()[0]; } -- cgit v1.2.1 From 489b556947087f7606224d6fc09f302eabef14c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 17 Jan 2023 17:52:16 +0200 Subject: MDEV-30422 Merge new release of InnoDB 5.7.41 to 10.3 MySQL 5.7.41 includes one InnoDB change mysql/mysql-server@d2d6b2dd00f709bc528386009150d4bc726e25a0 that seems to be applicable to MariaDB Server 10.3 and 10.4. Even though commit 5b9ee8d8193a8c7a8ebdd35eedcadc3ae78e7fc1 seems to have fixed sporadic failures on our CI systems, it is theoretically possible that another race condition remained. buf_flush_page_cleaner_coordinator(): In the final loop, wait also for buf_get_n_pending_read_ios() to reach 0. In this way, if a secondary index leaf page was read into the buffer pool and ibuf_merge_or_delete_for_page() modified that page or some change buffer pages, the flush loop would execute until the buffer pool really is in a clean state. This potential data corruption bug does not affect MariaDB Server 10.5 or later, thanks to commit b42294bc6409794bdbd2051b32fa079d81cea61d which removed change buffer merges that are not explicitly requested. --- storage/innobase/buf/buf0flu.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 6e97be03bdd..b8f8c243a4f 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -3324,20 +3324,27 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*) bool success; do { + /* In case an asynchronous read request was posted by + any thread (other than something invoking + ibuf_merge_in_background()), it is possible that the + change buffer will be merged to the page once the read + completes. To avoid race conditions and corruption due + to that, we will loop here until there are no pending + page read operations. */ + success = !buf_get_n_pending_read_ios(); pc_request(ULINT_MAX, LSN_MAX); while (pc_flush_slot() > 0) {} ulint n_flushed_lru = 0; ulint n_flushed_list = 0; - success = pc_wait_finished(&n_flushed_lru, &n_flushed_list); - - n_flushed = n_flushed_lru + n_flushed_list; + success = pc_wait_finished(&n_flushed_lru, &n_flushed_list) + && success && !n_flushed_lru && !n_flushed_list; buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); buf_flush_wait_LRU_batch_end(); - } while (!success || n_flushed > 0); + } while (!success); /* Some sanity checks */ ut_a(srv_get_active_thread_type() == SRV_NONE); -- cgit v1.2.1