diff options
author | unknown <guilhem@mysql.com> | 2005-03-02 11:29:48 +0100 |
---|---|---|
committer | unknown <guilhem@mysql.com> | 2005-03-02 11:29:48 +0100 |
commit | d7e0784c65b44f3fa36f602e5f1d7579eed6b88b (patch) | |
tree | a63b632fd76de92b2e46c444930abb8d50336b2d /sql/slave.cc | |
parent | 3087b2f1fde43ea192bcf7a2fa217aadc7bf7d3a (diff) | |
download | mariadb-git-d7e0784c65b44f3fa36f602e5f1d7579eed6b88b.tar.gz |
Fix for BUG#8325 "Deadlock in replication thread stops replication":
in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded,
optionally retry the transaction a certain number of times (new variable --slave_transaction_retries).
sql/mysql_priv.h:
new var slave_transaction_retries
sql/mysqld.cc:
new variable slave_transaction_retries. Plus fixing a typo.
sql/set_var.cc:
new global variable slave_transaction_retries (will be one per subslave, when we have multimaster).
sql/slave.cc:
Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded,
optionally retry the transaction a certain number of times (--slave_transaction_retries).
sql/slave.h:
new RELAY_LOG_INFO::trans_retries.
Diffstat (limited to 'sql/slave.cc')
-rw-r--r-- | sql/slave.cc | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index a39cbdbe14b..145f4295075 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2959,6 +2959,62 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) exec_res = ev->exec_event(rli); DBUG_ASSERT(rli->sql_thd==thd); delete ev; + if (slave_trans_retries) + { + if (exec_res && + (thd->net.last_errno == ER_LOCK_DEADLOCK || + thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) && + !thd->is_fatal_error) + { + const char *errmsg; + /* + We were in a transaction which has been rolled back because of a + deadlock (currently, InnoDB deadlock detected by InnoDB) or lock + wait timeout (innodb_lock_wait_timeout exceeded); let's seek back to + BEGIN log event and retry it all again. + We have to not only seek but also + a) init_master_info(), to seek back to hot relay log's start for later + (for when we will come back to this hot log after re-processing the + possibly existing old logs where BEGIN is: check_binlog_magic() will + then need the cache to be at position 0 (see comments at beginning of + init_master_info()). + b) init_relay_log_pos(), because the BEGIN may be an older relay log. + */ + if (rli->trans_retries--) + { + sql_print_information("Slave SQL thread retries transaction"); + if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL)) + sql_print_error("Failed to initialize the master info structure"); + else if (init_relay_log_pos(rli, + rli->group_relay_log_name, + rli->group_relay_log_pos, + 1, &errmsg)) + sql_print_error("Error initializing relay log position: %s", + errmsg); + else + { + exec_res= 0; + sleep(2); // chance for concurrent connection to get more locks + } + } + else + sql_print_error("Slave SQL thread retried transaction %lu time(s) " + "in vain, giving up. Consider raising the value of " + "the slave_transaction_retries variable.", + slave_trans_retries); + } + if (!((thd->options & OPTION_BEGIN) && opt_using_transactions)) + { + rli->trans_retries= slave_trans_retries; // restart from fresh + /* + TODO: when merged into 5.0, when slave does auto-rollback if + corrupted binlog, this should reset the retry counter too + (any rollback should). In fact it will work, as here we are just out + of a Format_description_log_event::exec_event() which rolled back. + But check repl code in 5.0 for new ha_rollback calls, just in case. + */ + } + } return exec_res; } else @@ -3370,6 +3426,7 @@ slave_begin: pthread_mutex_lock(&rli->log_space_lock); rli->ignore_log_space_limit= 0; pthread_mutex_unlock(&rli->log_space_lock); + rli->trans_retries= slave_trans_retries; // start from "no error" if (init_relay_log_pos(rli, rli->group_relay_log_name, |