From 80d0dd7babb5ade8345cdd7065e8f9ef6b65e3da Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 25 Oct 2013 12:56:12 +0200 Subject: MDEV-4506: Parallel replication. Do not update relay-log.info and master.info on disk after every event when using GTID mode: - relay-log.info and master.info are not crash-safe, and are not used when slave restarts in GTID mode (slave connects with GTID position instead and immediately rewrites the file with the new, correct information found). - When using GTID and parallel replication, the position in relay-log.info is misleading at best and simply wrong at worst. - When using parallel replication, the fact that every single transaction needs to do a write() syscall to the same file is likely to become a serious bottleneck. The files are still written at normal slave stop. In non-GTID mode, the files are written as normal (this is needed to be able to restart after slave crash, even if such restart is then not crash-safe, no change). --- sql/rpl_parallel.cc | 7 +------ sql/rpl_rli.cc | 9 ++++++--- sql/slave.cc | 7 ++++++- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'sql') diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index b8d75c7bc82..e1d8b3a2f0c 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -14,10 +14,7 @@ following transactions, so slave binlog position will be correct. And all the retry logic for temporary errors like deadlock. - - In GTID replication, we should not need to update master.info and - relay-log.info on disk at all except at slave thread stop. They are not - used to know where to restart, the updates are not crash-safe, and it - could negatively affect performance. + - Retry of failed transactions is not yet implemented for the parallel case. - All the waits (eg. in struct wait_for_commit and in rpl_parallel_thread_pool::get_thread()) need to be killable. And on kill, @@ -29,8 +26,6 @@ slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. See Format_description_log_event::do_apply_event(). - - - Retry of failed transactions is not yet implemented for the parallel case. */ struct rpl_parallel_thread_pool global_rpl_thread_pool; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index d8a604cfe32..ebbe5f4407c 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1274,9 +1274,12 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, DBA aware of the problem in the error log. */ } - DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); - flush_relay_log_info(this); - DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); + if (mi->using_gtid == Master_info::USE_GTID_NO) + { + DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); + flush_relay_log_info(this); + DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); + } /* Note that Rotate_log_event::do_apply_event() does not call this function, so there is no chance that a fake rotate event resets diff --git a/sql/slave.cc b/sql/slave.cc index 113462b5aa0..fcc92f42536 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3901,7 +3901,8 @@ Stopping slave I/O thread due to out-of-memory error from master"); goto err; } - if (flush_master_info(mi, TRUE, TRUE)) + if (mi->using_gtid != Master_info::USE_GTID_NO && + flush_master_info(mi, TRUE, TRUE)) { sql_print_error("Failed to flush master info file"); goto err; @@ -3978,6 +3979,8 @@ err: mi->mysql=0; } write_ignored_events_info_to_relay_log(thd, mi); + if (mi->using_gtid != Master_info::USE_GTID_NO) + flush_master_info(mi, TRUE, TRUE); thd_proc_info(thd, "Slave io thread waiting for slave mutex on exit"); mysql_mutex_lock(&mi->run_lock); @@ -4462,6 +4465,8 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ thd->catalog= 0; thd->reset_query(); thd->reset_db(NULL, 0); + if (rli->mi->using_gtid != Master_info::USE_GTID_NO) + flush_relay_log_info(rli); thd_proc_info(thd, "Sql driver thread waiting for slave mutex on exit"); mysql_mutex_lock(&rli->run_lock); err_during_init: -- cgit v1.2.1