From b0b60f249807b6c2d423313350d9ad66693c2d1e Mon Sep 17 00:00:00 2001
From: unknown <knielsen@knielsen-hq.org>
Date: Thu, 8 May 2014 14:20:18 +0200
Subject: MDEV-5262: Missing retry after temp error in parallel replication

Start implementing that an event group can be re-tried in parallel replication
if it fails with a temporary error (like deadlock).

Patch is very incomplete, just some very basic retry works.

Stuff still missing (not complete list):

 - Handle moving to the next relay log file, if event group to be retried
   spans multiple relay log files.

 - Handle refcounting of relay log files, to ensure that we do not purge a
   relay log file and then later attempt to re-execute events out of it.

 - Handle description_event_for_exec - we need to save this somehow for the
   possible retry - and use the correct one in case it differs between relay
   logs.

 - Do another retry attempt in case the first retry also fails.

 - Limit the max number of retries.

 - Lots of testing will be needed for the various edge cases.
---
 sql/rpl_rli.cc | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index cc543f7c377..3a3e22f970a 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -52,6 +52,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
    info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period),
    sync_counter(0), is_relay_log_recovery(is_slave_recovery),
    save_temporary_tables(0), mi(0),
+   inuse_relaylog_list(0), last_inuse_relaylog(0),
    cur_log_old_open_count(0), group_relay_log_pos(0), 
    event_relay_log_pos(0),
 #if HAVE_valgrind
@@ -98,8 +99,17 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
 
 Relay_log_info::~Relay_log_info()
 {
+  inuse_relaylog *cur;
   DBUG_ENTER("Relay_log_info::~Relay_log_info");
 
+  cur= inuse_relaylog_list;
+  while (cur)
+  {
+    DBUG_ASSERT(cur->queued_count == cur->dequeued_count);
+    inuse_relaylog *next= cur->next;
+    my_free(cur);
+    cur= next;
+  }
   mysql_mutex_destroy(&run_lock);
   mysql_mutex_destroy(&data_lock);
   mysql_mutex_destroy(&log_space_lock);
@@ -1339,6 +1349,29 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos,
   DBUG_VOID_RETURN;
 }
 
+
+int
+Relay_log_info::alloc_inuse_relaylog(const char *name)
+{
+  inuse_relaylog *ir;
+
+  if (!(ir= (inuse_relaylog *)my_malloc(sizeof(*ir), MYF(MY_WME|MY_ZEROFILL))))
+  {
+    my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*ir));
+    return 1;
+  }
+  strcpy(ir->name, name);
+
+  if (!inuse_relaylog_list)
+    inuse_relaylog_list= ir;
+  else
+    last_inuse_relaylog->next= ir;
+  last_inuse_relaylog= ir;
+
+  return 0;
+}
+
+
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 int
 rpl_load_gtid_slave_state(THD *thd)
@@ -1623,7 +1656,7 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi,
 
 void rpl_group_info::cleanup_context(THD *thd, bool error)
 {
-  DBUG_ENTER("Relay_log_info::cleanup_context");
+  DBUG_ENTER("rpl_group_info::cleanup_context");
   DBUG_PRINT("enter", ("error: %d", (int) error));
   
   DBUG_ASSERT(this->thd == thd);
@@ -1689,7 +1722,7 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
 
 void rpl_group_info::clear_tables_to_lock()
 {
-  DBUG_ENTER("Relay_log_info::clear_tables_to_lock()");
+  DBUG_ENTER("rpl_group_info::clear_tables_to_lock()");
 #ifndef DBUG_OFF
   /**
     When replicating in RBR and MyISAM Merge tables are involved
@@ -1736,7 +1769,7 @@ void rpl_group_info::clear_tables_to_lock()
 
 void rpl_group_info::slave_close_thread_tables(THD *thd)
 {
-  DBUG_ENTER("Relay_log_info::slave_close_thread_tables(THD *thd)");
+  DBUG_ENTER("rpl_group_info::slave_close_thread_tables(THD *thd)");
   thd->get_stmt_da()->set_overwrite_status(true);
   thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd);
   thd->get_stmt_da()->set_overwrite_status(false);
-- 
cgit v1.2.1


From 787c470cef54574e744eb5dfd9153d837fe67e45 Mon Sep 17 00:00:00 2001
From: unknown <knielsen@knielsen-hq.org>
Date: Thu, 15 May 2014 15:52:08 +0200
Subject: MDEV-5262: Missing retry after temp error in parallel replication

Handle retry of event groups that span multiple relay log files.

 - If retry reaches the end of one relay log file, move on to the next.

 - Handle refcounting of relay log files, and avoid purging relay log
   files until all event groups have completed that might have needed
   them for transaction retry.
---
 sql/rpl_rli.cc | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 3a3e22f970a..688068b850f 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -92,6 +92,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
   mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL);
   mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL);
   mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL);
+  my_atomic_rwlock_init(&inuse_relaylog_atomic_lock);
   relay_log.init_pthread_objects();
   DBUG_VOID_RETURN;
 }
@@ -117,6 +118,7 @@ Relay_log_info::~Relay_log_info()
   mysql_cond_destroy(&start_cond);
   mysql_cond_destroy(&stop_cond);
   mysql_cond_destroy(&log_space_cond);
+  my_atomic_rwlock_destroy(&inuse_relaylog_atomic_lock);
   relay_log.cleanup();
   DBUG_VOID_RETURN;
 }
@@ -1365,7 +1367,10 @@ Relay_log_info::alloc_inuse_relaylog(const char *name)
   if (!inuse_relaylog_list)
     inuse_relaylog_list= ir;
   else
+  {
+    last_inuse_relaylog->completed= true;
     last_inuse_relaylog->next= ir;
+  }
   last_inuse_relaylog= ir;
 
   return 0;
-- 
cgit v1.2.1


From 629b822913348cec56ec7a80a236f0ba2e613585 Mon Sep 17 00:00:00 2001
From: unknown <knielsen@knielsen-hq.org>
Date: Tue, 3 Jun 2014 10:31:11 +0200
Subject: MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
 replication causing replication to fail.

In parallel replication, we run transactions from the master in parallel, but
force them to commit in the same order they did on the master. If we force T1
to commit before T2, but T2 holds eg. a row lock that is needed by T1, we get
a deadlock when T2 waits until T1 has committed.

Usually, we do not run T1 and T2 in parallel if there is a chance that they
can have conflicting locks like this, but there are certain edge cases where
it can occasionally happen (eg. MDEV-5914, MDEV-5941, MDEV-6020). The bug was
that this would cause replication to hang, eventually getting a lock timeout
and causing the slave to stop with error.

With this patch, InnoDB will report back to the upper layer whenever a
transactions T1 is about to do a lock wait on T2. If T1 and T2 are parallel
replication transactions, and T2 needs to commit later than T1, we can thus
detect the deadlock; we then kill T2, setting a flag that causes it to catch
the kill and convert it to a deadlock error; this error will then cause T2 to
roll back and release its locks (so that T1 can commit), and later T2 will be
re-tried and eventually also committed.

The kill happens asynchroneously in a slave background thread; this is
necessary, as the reporting from InnoDB about lock waits happen deep inside
the locking code, at a point where it is not possible to directly call
THD::awake() due to mutexes held.

Deadlock is assumed to be (very) rarely occuring, so this patch tries to
minimise the performance impact on the normal case where no deadlocks occur,
rather than optimise the handling of the occasional deadlock.

Also fix transaction retry due to deadlock when it happens after a transaction
already signalled to later transactions that it started to commit. In this
case we need to undo this signalling (and later redo it when we commit again
during retry), so following transactions will not start too early.

Also add a missing thd->send_kill_message() that got triggered during testing
(this corrects an incorrect fix for MySQL Bug#58933).
---
 sql/rpl_rli.cc | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 688068b850f..9c315271387 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1843,6 +1843,7 @@ rpl_group_info::mark_start_commit()
 }
 
 
+<<<<<<< TREE
 /*
   Format the current GTID as a string suitable for printing in error messages.
 
@@ -1863,6 +1864,36 @@ rpl_group_info::gtid_info()
 }
 
 
+=======
+/*
+  Undo the effect of a prior mark_start_commit().
+
+  This is only used for retrying a transaction in parallel replication, after
+  we have encountered a deadlock or other temporary error.
+
+  When we get such a deadlock, it means that the current group of transactions
+  did not yet all start committing (else they would not have deadlocked). So
+  we will not yet have woken up anything in the next group, our rgi->gco is
+  still live, and we can simply decrement the counter (to be incremented again
+  later, when the retry succeeds and reaches the commit step).
+*/
+void
+rpl_group_info::unmark_start_commit()
+{
+  rpl_parallel_entry *e;
+
+  if (!did_mark_start_commit)
+    return;
+
+  e= this->parallel_entry;
+  mysql_mutex_lock(&e->LOCK_parallel_entry);
+  --e->count_committing_event_groups;
+  mysql_mutex_unlock(&e->LOCK_parallel_entry);
+  did_mark_start_commit= false;
+}
+
+
+>>>>>>> MERGE-SOURCE
 rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
   : rpl_filter(filter)
 {
-- 
cgit v1.2.1


From bd4153a8c2978af6d39a60a7f1c4e13c68fbbaab Mon Sep 17 00:00:00 2001
From: unknown <knielsen@knielsen-hq.org>
Date: Tue, 10 Jun 2014 10:13:15 +0200
Subject: MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
 replication causing replication to fail.

Remove the temporary fix for MDEV-5914, which used READ COMMITTED for parallel
replication worker threads. Replace it with a better, more selective solution.

The issue is with certain edge cases of InnoDB gap locks, for example between
INSERT and ranged DELETE. It is possible for the gap lock set by the DELETE to
block the INSERT, if the DELETE runs first, while the record lock set by
INSERT does not block the DELETE, if the INSERT runs first. This can cause a
conflict between the two in parallel replication on the slave even though they
ran without conflicts on the master.

With this patch, InnoDB will ask the server layer about the two involved
transactions before blocking on a gap lock. If the server layer tells InnoDB
that the transactions are already fixed wrt. commit order, as they are in
parallel replication, InnoDB will ignore the gap lock and allow the two
transactions to proceed in parallel, avoiding the conflict.

Improve the fix for MDEV-6020. When InnoDB itself detects a deadlock, it now
asks the server layer for any preferences about which transaction to roll
back. In case of parallel replication with two transactions T1 and T2 fixed to
commit T1 before T2, the server layer will ask InnoDB to roll back T2 as the
deadlock victim, not T1. This helps in some cases to avoid excessive deadlock
rollback, as T2 will in any case need to wait for T1 to complete before it can
itself commit.

Also some misc. fixes found during development and testing:

 - Remove thd_rpl_is_parallel(), it is not used or needed.

 - Use KILL_CONNECTION instead of KILL_QUERY when a parallel replication
   worker thread is killed to resolve a deadlock with fixed commit
   ordering. There are some cases, eg. in sql/sql_parse.cc, where a KILL_QUERY
   can be ignored if the query otherwise completed successfully, and this
   could cause the deadlock kill to be lost, so that the deadlock was not
   correctly resolved.

 - Fix random test failure due to missing wait_for_binlog_checkpoint.inc.

 - Make sure that deadlock or other temporary errors during parallel
   replication are not printed to the the error log; there were some places
   around the replication code with extra error logging. These conditions can
   occur occasionally and are handled automatically without breaking
   replication, so they should not pollute the error log.

 - Fix handling of rgi->gtid_sub_id. We need to be able to access this also at
   the end of a transaction, to be able to detect and resolve deadlocks due to
   commit ordering. But this value was also used as a flag to mark whether
   record_gtid() had been called, by being set to zero, losing the value. Now,
   introduce a separate flag rgi->gtid_pending, so rgi->gtid_sub_id remains
   valid for the entire duration of the transaction.

 - Fix one place where the code to handle ignored errors called reset_killed()
   unconditionally, even if no error was caught that should be ignored. This
   could cause loss of a deadlock kill signal, breaking deadlock detection and
   resolution.

 - Fix a couple of missing mysql_reset_thd_for_next_command(). This could
   cause a prior error condition to remain for the next event executed,
   causing assertions about errors already being set and possibly giving
   incorrect error handling for following event executions.

 - Fix code that cleared thd->rgi_slave in the parallel replication worker
   threads after each event execution; this caused the deadlock detection and
   handling code to not be able to correctly process the associated
   transactions as belonging to replication worker threads.

 - Remove useless error code in slave_background_kill_request().

 - Fix bug where wfc->wakeup_error was not cleared at
   wait_for_commit::unregister_wait_for_prior_commit(). This could cause the
   error condition to wrongly propagate to a later wait_for_prior_commit(),
   causing spurious ER_PRIOR_COMMIT_FAILED errors.

 - Do not put the binlog background thread into the processlist. It causes
   too many result differences in mtr, but also it probably is not useful
   for users to pollute the process list with a system thread that does not
   really perform any user-visible tasks...
---
 sql/rpl_rli.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 9c315271387..08327588698 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1563,6 +1563,8 @@ rpl_group_info::reinit(Relay_log_info *rli)
   tables_to_lock_count= 0;
   trans_retries= 0;
   last_event_start_time= 0;
+  gtid_sub_id= 0;
+  gtid_pending= false;
   worker_error= 0;
   row_stmt_start_timestamp= 0;
   long_find_row_note_printed= false;
@@ -1572,7 +1574,7 @@ rpl_group_info::reinit(Relay_log_info *rli)
 }
 
 rpl_group_info::rpl_group_info(Relay_log_info *rli)
-  : thd(0), gtid_sub_id(0), wait_commit_sub_id(0),
+  : thd(0), wait_commit_sub_id(0),
     wait_commit_group_info(0), parallel_entry(0),
     deferred_events(NULL), m_annotate_event(0), is_parallel_exec(false)
 {
@@ -1606,6 +1608,7 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev)
   rgi->current_gtid.server_id= gev->server_id;
   rgi->current_gtid.domain_id= gev->domain_id;
   rgi->current_gtid.seq_no= gev->seq_no;
+  rgi->gtid_pending= true;
   return 0;
 }
 
-- 
cgit v1.2.1


From 98fc5b3af8b1954e4480ac33d30493aa4de66ec4 Mon Sep 17 00:00:00 2001
From: Kristian Nielsen <knielsen@knielsen-hq.org>
Date: Tue, 8 Jul 2014 12:54:47 +0200
Subject: MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
 replication causing replication to fail.

After-review changes.

For this patch in 10.0, we do not introduce a new public storage engine API,
we just fix the InnoDB/XtraDB issues. In 10.1, we will make a better public
API that can be used for all storage engines (MDEV-6429).

Eliminate the background thread that did deadlock kills asynchroneously.
Instead, we ensure that the InnoDB/XtraDB code can handle doing the kill from
inside the deadlock detection code (when thd_report_wait_for() needs to kill a
later thread to resolve a deadlock).

(We preserve the part of the original patch that introduces dedicated mutex
and condition for the slave init thread, to remove the abuse of
LOCK_thread_count for start/stop synchronisation of the slave init thread).
---
 sql/rpl_rli.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 08327588698..ee1ca37ccc5 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1362,7 +1362,7 @@ Relay_log_info::alloc_inuse_relaylog(const char *name)
     my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*ir));
     return 1;
   }
-  strcpy(ir->name, name);
+  strmake_buf(ir->name, name);
 
   if (!inuse_relaylog_list)
     inuse_relaylog_list= ir;
@@ -1564,6 +1564,7 @@ rpl_group_info::reinit(Relay_log_info *rli)
   trans_retries= 0;
   last_event_start_time= 0;
   gtid_sub_id= 0;
+  commit_id= 0;
   gtid_pending= false;
   worker_error= 0;
   row_stmt_start_timestamp= 0;
@@ -1608,6 +1609,7 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev)
   rgi->current_gtid.server_id= gev->server_id;
   rgi->current_gtid.domain_id= gev->domain_id;
   rgi->current_gtid.seq_no= gev->seq_no;
+  rgi->commit_id= gev->commit_id;
   rgi->gtid_pending= true;
   return 0;
 }
-- 
cgit v1.2.1


From ba4e56d8d753b57815edd38fb17bd2a2a1d9ca8c Mon Sep 17 00:00:00 2001
From: Kristian Nielsen <knielsen@knielsen-hq.org>
Date: Tue, 8 Jul 2014 15:59:03 +0200
Subject: Fix small merge errors after rebase

---
 sql/rpl_rli.cc | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'sql/rpl_rli.cc')

diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index ee1ca37ccc5..3307293e656 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1848,7 +1848,6 @@ rpl_group_info::mark_start_commit()
 }
 
 
-<<<<<<< TREE
 /*
   Format the current GTID as a string suitable for printing in error messages.
 
@@ -1869,7 +1868,6 @@ rpl_group_info::gtid_info()
 }
 
 
-=======
 /*
   Undo the effect of a prior mark_start_commit().
 
@@ -1898,7 +1896,6 @@ rpl_group_info::unmark_start_commit()
 }
 
 
->>>>>>> MERGE-SOURCE
 rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
   : rpl_filter(filter)
 {
-- 
cgit v1.2.1