summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2018-04-24 14:43:41 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2018-04-24 14:43:41 +0300
commit48e3b4ca5dd6a6cffbee64381dc301d43c66e036 (patch)
tree7e26808d589f850fb518b41d9af5613891a9f8aa
parent9c34a4124d67d9e3f70837eaeb11290f35e8f8d0 (diff)
downloadmariadb-git-bb-10.1-MDEV-15607.tar.gz
MDEV-15607: mysqld crashed few after node is being joined with sstbb-10.1-MDEV-15607
This is a typical systemd response where it tries to shutdown the joiner (due to "timeout") before the joiner manages to complete SST. wsrep_sst_wait wsrep_SE_init_wait While waiting the operation to finish use mysql_cond_timedwait instead of mysql_cond_wait and if operation is not finished extend systemd timeout (if needed).
-rw-r--r--sql/wsrep_sst.cc21
1 files changed, 19 insertions, 2 deletions
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
index 260755d08a8..c1f980bd595 100644
--- a/sql/wsrep_sst.cc
+++ b/sql/wsrep_sst.cc
@@ -30,6 +30,10 @@
#include <cstdio>
#include <cstdlib>
+#if MYSQL_VERSION_ID < 100200
+# include <my_service_manager.h>
+#endif
+
static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
sizeof(WSREP_SST_OPT_CONF) +
sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
@@ -186,6 +190,9 @@ bool wsrep_before_SE()
static bool sst_complete = false;
static bool sst_needed = false;
+#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
+#define WSREP_TIMEDWAIT_SECONDS 10
+
void wsrep_sst_grab ()
{
WSREP_INFO("wsrep_sst_grab()");
@@ -197,11 +204,16 @@ void wsrep_sst_grab ()
// Wait for end of SST
bool wsrep_sst_wait ()
{
+ struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
while (!sst_complete)
{
WSREP_INFO("Waiting for SST to complete.");
- mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
+ mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);
+ if (!sst_complete)
+ service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+ "WSREP state transfer ongoing, current seqno: %ld", local_seqno);
+
}
if (local_seqno >= 0)
@@ -1298,9 +1310,14 @@ void wsrep_SE_init_grab()
void wsrep_SE_init_wait()
{
+ struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
while (SE_initialized == false)
{
- mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
+ mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);
+
+ if (!SE_initialized)
+ service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+ "WSREP SE initialization ongoing.");
}
mysql_mutex_unlock (&LOCK_wsrep_sst_init);
}