From ae6801eb23cf91c83db0727784e5037e03e2df2d Mon Sep 17 00:00:00 2001
From: Davi Arnaut <davi.arnaut@oracle.com>
Date: Fri, 22 Oct 2010 09:58:09 -0200
Subject: Bug#37780: Make KILL reliable (main.kill fails randomly)

- A prerequisite cleanup patch for making KILL reliable.

The test case main.kill did not work reliably.

The following problems have been identified:

1. A kill signal could go lost if it came in, short before a
thread went reading on the client connection.

2. A kill signal could go lost if it came in, short before a
thread went waiting on a condition variable.

These problems have been solved as follows. Please see also
added code comments for more details.

1. There is no safe way to detect, when a thread enters the
blocking state of a read(2) or recv(2) system call, where it
can be interrupted by a signal. Hence it is not possible to
wait for the right moment to send a kill signal. It has been
decided, not to fix it in the code.  Instead, the test case
repeats the KILL statement until the connection terminates.

2. Before waiting on a condition variable, we register it
together with a synchronizating mutex in THD::mysys_var. After
this, we need to test THD::killed again. At some places we did
only test it in a loop condition before the registration. When
THD::killed had been set between this test and the registration,
we entered waiting without noticing the killed flag. Additional
checks ahve been introduced where required.

In addition to the above, a re-write of the main.kill test
case has been done. All sleeps have been replaced by Debug
Sync Facility synchronization. A couple of sync points have
been added to the server code.

To avoid further problems, if the test case fails in spite of
the fixes, the test case has been added to the "experimental"
list for now.

- Most of the work on this patch is authored by Ingo Struewing
---
 sql/sql_class.cc | 63 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 12 deletions(-)

(limited to 'sql/sql_class.cc')

diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 15fbc6a1480..da61c67f1c8 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1179,36 +1179,70 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
 }
 
 
+/**
+  Awake a thread.
+
+  @param[in]  state_to_set    value for THD::killed
+
+  This is normally called from another thread's THD object.
+
+  @note Do always call this while holding LOCK_thd_data.
+*/
+
 void THD::awake(THD::killed_state state_to_set)
 {
   DBUG_ENTER("THD::awake");
-  DBUG_PRINT("enter", ("this: 0x%lx", (long) this));
+  DBUG_PRINT("enter", ("this: %p current_thd: %p", this, current_thd));
   THD_CHECK_SENTRY(this);
   mysql_mutex_assert_owner(&LOCK_thd_data);
 
+  /* Set the 'killed' flag of 'this', which is the target THD object. */
   killed= state_to_set;
+
   if (state_to_set != THD::KILL_QUERY)
   {
-    thr_alarm_kill(thread_id);
-    if (!slave_thread)
-      MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (this));
 #ifdef SIGNAL_WITH_VIO_CLOSE
     if (this != current_thd)
     {
       /*
-        In addition to a signal, let's close the socket of the thread that
-        is being killed. This is to make sure it does not block if the
-        signal is lost. This needs to be done only on platforms where
-        signals are not a reliable interruption mechanism.
-
-        If we're killing ourselves, we know that we're not blocked, so this
-        hack is not used.
+        Before sending a signal, let's close the socket of the thread
+        that is being killed ("this", which is not the current thread).
+        This is to make sure it does not block if the signal is lost.
+        This needs to be done only on platforms where signals are not
+        a reliable interruption mechanism.
+
+        Note that the downside of this mechanism is that we could close
+        the connection while "this" target thread is in the middle of
+        sending a result to the application, thus violating the client-
+        server protocol.
+
+        On the other hand, without closing the socket we have a race
+        condition. If "this" target thread passes the check of
+        thd->killed, and then the current thread runs through
+        THD::awake(), sets the 'killed' flag and completes the
+        signaling, and then the target thread runs into read(), it will
+        block on the socket. As a result of the discussions around
+        Bug#37780, it has been decided that we accept the race
+        condition. A second KILL awakes the target from read().
+
+        If we are killing ourselves, we know that we are not blocked.
+        We also know that we will check thd->killed before we go for
+        reading the next statement.
       */
 
       close_active_vio();
     }
-#endif    
+#endif
+
+    /* Mark the target thread's alarm request expired, and signal alarm. */
+    thr_alarm_kill(thread_id);
+
+    /* Send an event to the scheduler that a thread should be killed. */
+    if (!slave_thread)
+      MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (this));
   }
+
+  /* Broadcast a condition to kick the target if it is waiting on it. */
   if (mysys_var)
   {
     mysql_mutex_lock(&mysys_var->mutex);
@@ -1232,6 +1266,11 @@ void THD::awake(THD::killed_state state_to_set)
       we issue a second KILL or the status it's waiting for happens).
       It's true that we have set its thd->killed but it may not
       see it immediately and so may have time to reach the cond_wait().
+
+      However, where possible, we test for killed once again after
+      enter_cond(). This should make the signaling as safe as possible.
+      However, there is still a small chance of failure on platforms with
+      instruction or memory write reordering.
     */
     if (mysys_var->current_cond && mysys_var->current_mutex)
     {
-- 
cgit v1.2.1


From 378cdc58c14afb7c48752f98889073fefe2c7ca7 Mon Sep 17 00:00:00 2001
From: Dmitry Lenev <Dmitry.Lenev@oracle.com>
Date: Thu, 11 Nov 2010 20:11:05 +0300
Subject: Patch that refactors global read lock implementation and fixes bug
 #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK" and bug
 #54673 "It takes too long to get readlock for 'FLUSH TABLES WITH READ LOCK'".

The first bug manifested itself as a deadlock which occurred
when a connection, which had some table open through HANDLER
statement, tried to update some data through DML statement
while another connection tried to execute FLUSH TABLES WITH
READ LOCK concurrently.

What happened was that FTWRL in the second connection managed
to perform first step of GRL acquisition and thus blocked all
upcoming DML. After that it started to wait for table open
through HANDLER statement to be flushed. When the first connection
tried to execute DML it has started to wait for GRL/the second
connection creating deadlock.

The second bug manifested itself as starvation of FLUSH TABLES
WITH READ LOCK statements in cases when there was a constant
stream of concurrent DML statements (in two or more
connections).

This has happened because requests for protection against GRL
which were acquired by DML statements were ignoring presence of
pending GRL and thus the latter was starved.

This patch solves both these problems by re-implementing GRL
using metadata locks.

Similar to the old implementation acquisition of GRL in new
implementation is two-step. During the first step we block
all concurrent DML and DDL statements by acquiring global S
metadata lock (each DML and DDL statement acquires global IX
lock for its duration). During the second step we block commits
by acquiring global S lock in COMMIT namespace (commit code
acquires global IX lock in this namespace).

Note that unlike in old implementation acquisition of
protection against GRL in DML and DDL is semi-automatic.
We assume that any statement which should be blocked by GRL
will either open and acquires write-lock on tables or acquires
metadata locks on objects it is going to modify. For any such
statement global IX metadata lock is automatically acquired
for its duration.

The first problem is solved because waits for GRL become
visible to deadlock detector in metadata locking subsystem
and thus deadlocks like one in the first bug become impossible.

The second problem is solved because global S locks which
are used for GRL implementation are given preference over
IX locks which are acquired by concurrent DML (and we can
switch to fair scheduling in future if needed).

Important change:
FTWRL/GRL no longer blocks DML and DDL on temporary tables.
Before this patch behavior was not consistent in this respect:
in some cases DML/DDL statements on temporary tables were
blocked while in others they were not. Since the main use cases
for FTWRL are various forms of backups and temporary tables are
not preserved during backups we have opted for consistently
allowing DML/DDL on temporary tables during FTWRL/GRL.

Important change:
This patch changes thread state names which are used when
DML/DDL of FTWRL is waiting for global read lock. It is now
either "Waiting for global read lock" or "Waiting for commit
lock" depending on the stage on which FTWRL is.

Incompatible change:
To solve deadlock in events code which was exposed by this
patch we have to replace LOCK_event_metadata mutex with
metadata locks on events. As result we have to prohibit
DDL on events under LOCK TABLES.

This patch also adds extensive test coverage for interaction
of DML/DDL and FTWRL.

Performance of new and old global read lock implementations
in sysbench tests were compared. There were no significant
difference between new and old implementations.
---
 sql/sql_class.cc | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'sql/sql_class.cc')

diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index c848d686299..1823a0416ff 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -3497,11 +3497,15 @@ void THD::set_mysys_var(struct st_my_thread_var *new_mysys_var)
 void THD::leave_locked_tables_mode()
 {
   locked_tables_mode= LTM_NONE;
-  /* Make sure we don't release the global read lock when leaving LTM. */
-  mdl_context.reset_trans_sentinel(global_read_lock.global_shared_lock());
+  mdl_context.set_transaction_duration_for_all_locks();
+  /*
+    Make sure we don't release the global read lock and commit blocker
+    when leaving LTM.
+  */
+  global_read_lock.set_explicit_lock_duration(this);
   /* Also ensure that we don't release metadata locks for open HANDLERs. */
   if (handler_tables_hash.records)
-    mysql_ha_move_tickets_after_trans_sentinel(this);
+    mysql_ha_set_explicit_lock_duration(this);
 }
 
 void THD::get_definer(LEX_USER *definer)
-- 
cgit v1.2.1