From 80d0dd7babb5ade8345cdd7065e8f9ef6b65e3da Mon Sep 17 00:00:00 2001
From: unknown <knielsen@knielsen-hq.org>
Date: Fri, 25 Oct 2013 12:56:12 +0200
Subject: MDEV-4506: Parallel replication.

Do not update relay-log.info and master.info on disk after every event
when using GTID mode:

 - relay-log.info and master.info are not crash-safe, and are not used
   when slave restarts in GTID mode (slave connects with GTID position
   instead and immediately rewrites the file with the new, correct
   information found).

 - When using GTID and parallel replication, the position in
   relay-log.info is misleading at best and simply wrong at worst.

 - When using parallel replication, the fact that every single
   transaction needs to do a write() syscall to the same file is
   likely to become a serious bottleneck.

The files are still written at normal slave stop.

In non-GTID mode, the files are written as normal (this is needed to
be able to restart after slave crash, even if such restart is then not
crash-safe, no change).
---
 sql/rpl_parallel.cc | 7 +------
 sql/rpl_rli.cc      | 9 ++++++---
 sql/slave.cc        | 7 ++++++-
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'sql')

diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index b8d75c7bc82..e1d8b3a2f0c 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -14,10 +14,7 @@
      following transactions, so slave binlog position will be correct.
      And all the retry logic for temporary errors like deadlock.
 
-   - In GTID replication, we should not need to update master.info and
-     relay-log.info on disk at all except at slave thread stop. They are not
-     used to know where to restart, the updates are not crash-safe, and it
-     could negatively affect performance.
+   - Retry of failed transactions is not yet implemented for the parallel case.
 
    - All the waits (eg. in struct wait_for_commit and in
      rpl_parallel_thread_pool::get_thread()) need to be killable. And on kill,
@@ -29,8 +26,6 @@
      slave rolls back the transaction; parallel execution needs to be able
      to deal with this wrt. commit_orderer and such.
      See Format_description_log_event::do_apply_event().
-
-   - Retry of failed transactions is not yet implemented for the parallel case.
 */
 
 struct rpl_parallel_thread_pool global_rpl_thread_pool;
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index d8a604cfe32..ebbe5f4407c 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1274,9 +1274,12 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos,
         DBA aware of the problem in the error log.
       */
     }
-    DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE(););
-    flush_relay_log_info(this);
-    DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE(););
+    if (mi->using_gtid == Master_info::USE_GTID_NO)
+    {
+      DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE(););
+      flush_relay_log_info(this);
+      DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE(););
+    }
     /*
       Note that Rotate_log_event::do_apply_event() does not call this
       function, so there is no chance that a fake rotate event resets
diff --git a/sql/slave.cc b/sql/slave.cc
index 113462b5aa0..fcc92f42536 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3901,7 +3901,8 @@ Stopping slave I/O thread due to out-of-memory error from master");
         goto err;
       }
 
-      if (flush_master_info(mi, TRUE, TRUE))
+      if (mi->using_gtid != Master_info::USE_GTID_NO &&
+          flush_master_info(mi, TRUE, TRUE))
       {
         sql_print_error("Failed to flush master info file");
         goto err;
@@ -3978,6 +3979,8 @@ err:
     mi->mysql=0;
   }
   write_ignored_events_info_to_relay_log(thd, mi);
+  if (mi->using_gtid != Master_info::USE_GTID_NO)
+    flush_master_info(mi, TRUE, TRUE);
   thd_proc_info(thd, "Slave io thread waiting for slave mutex on exit");
   mysql_mutex_lock(&mi->run_lock);
 
@@ -4462,6 +4465,8 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   thd->catalog= 0;
   thd->reset_query();
   thd->reset_db(NULL, 0);
+  if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
+    flush_relay_log_info(rli);
   thd_proc_info(thd, "Sql driver thread waiting for slave mutex on exit");
   mysql_mutex_lock(&rli->run_lock);
 err_during_init:
-- 
cgit v1.2.1