diff options
author | Nirbhay Choubey <nirbhay@mariadb.com> | 2016-06-29 16:50:53 -0400 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@mariadb.com> | 2016-08-21 16:20:09 -0400 |
commit | cced23cf23f013bee9f137001f1d51142bace964 (patch) | |
tree | 1cf65c7953be7e9b4c3d54ad6ca9044772b2f744 | |
parent | 415823a41cb7f302e9620f2b0fb57bcc69140d3f (diff) | |
download | mariadb-git-cced23cf23f013bee9f137001f1d51142bace964.tar.gz |
MDEV-9423: cannot add new node to the cluser: Binlog..
.. file '/var/log/mysql/mariadb-bin.000001' not found in binlog
index, needed for recovery. Aborting.
In Galera cluster, while preparing for rsync/xtrabackup based
SST, the donor node takes an FTWRL followed by (REFRESH_ENGINE_LOG
in rsync based state transfer and) REFRESH_BINARY_LOG. The latter
rotates the binary log and logs Binlog_checkpoint_log_event
corresponding to the penultimate binary log file into the new file.
The checkpoint event for the current file is later logged
synchronously by binlog_background_thread.
Now, since in rsync/xtrabackup based snapshot state transfer methods,
only the last binary log file is transferred to the joiner node; the
file could get transferred even before the checkpoint event for the
same file gets written to it. As a result, the joiner node would fail
to start complaining about the missing binlog file needed for recovery.
In order to fix this, a mechanism has been put in place to make
REFRESH_BINARY_LOG operation wait for Binlog_checkpoint_log_event
to be logged for the current binary log file if the node is part of
a Galera cluster. As further safety, during rsync based state transfer
the donor node now acquires and owns LOCK_log for the duration of file
transfer during SST.
-rw-r--r-- | sql/log.cc | 29 | ||||
-rw-r--r-- | sql/log.h | 1 | ||||
-rw-r--r-- | sql/sql_reload.cc | 6 | ||||
-rw-r--r-- | sql/wsrep_sst.cc | 20 |
4 files changed, 53 insertions, 3 deletions
diff --git a/sql/log.cc b/sql/log.cc index 1d11b6ff01b..2479208b395 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3687,7 +3687,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name, new_xid_list_entry->binlog_id= current_binlog_id; /* Remove any initial entries with no pending XIDs. */ while ((b= binlog_xid_count_list.head()) && b->xid_count == 0) + { my_free(binlog_xid_count_list.get()); + } + mysql_cond_broadcast(&COND_xid_list); binlog_xid_count_list.push_back(new_xid_list_entry); mysql_mutex_unlock(&LOCK_xid_list); @@ -4208,6 +4211,7 @@ err: DBUG_ASSERT(b->xid_count == 0); my_free(binlog_xid_count_list.get()); } + mysql_cond_broadcast(&COND_xid_list); reset_master_pending--; mysql_mutex_unlock(&LOCK_xid_list); } @@ -4218,6 +4222,26 @@ err: } +void MYSQL_BIN_LOG::wait_for_last_checkpoint_event() +{ + mysql_mutex_lock(&LOCK_xid_list); + for (;;) + { + if (binlog_xid_count_list.is_last(binlog_xid_count_list.head())) + break; + mysql_cond_wait(&COND_xid_list, &LOCK_xid_list); + } + mysql_mutex_unlock(&LOCK_xid_list); + + /* + LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be + obtained after mark_xid_done() has written the last checkpoint event. + */ + mysql_mutex_lock(&LOCK_log); + mysql_mutex_unlock(&LOCK_log); +} + + /** Delete relay log files prior to rli->group_relay_log_name (i.e. all logs which are not involved in a non-finished group @@ -9260,7 +9284,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) */ if (unlikely(reset_master_pending)) { - mysql_cond_signal(&COND_xid_list); + mysql_cond_broadcast(&COND_xid_list); mysql_mutex_unlock(&LOCK_xid_list); DBUG_VOID_RETURN; } @@ -9298,8 +9322,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) mysql_mutex_lock(&LOCK_log); mysql_mutex_lock(&LOCK_xid_list); --mark_xid_done_waiting; - if (unlikely(reset_master_pending)) - mysql_cond_signal(&COND_xid_list); + mysql_cond_broadcast(&COND_xid_list); /* We need to reload current_binlog_id due to release/re-take of lock. */ current= current_binlog_id; diff --git a/sql/log.h b/sql/log.h index 7f44113f66d..9eb9f88031d 100644 --- a/sql/log.h +++ b/sql/log.h @@ -774,6 +774,7 @@ public: bool need_mutex); bool reset_logs(THD* thd, bool create_new_log, rpl_gtid *init_state, uint32 init_state_len); + void wait_for_last_checkpoint_event(); void close(uint exiting); void clear_inuse_flag_when_closing(File file); diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index f8c04af56bb..a83e91680da 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -155,6 +155,12 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, { if (mysql_bin_log.rotate_and_purge(true)) *write_to_binlog= -1; + + if (WSREP_ON) + { + /* Wait for last binlog checkpoint event to be logged. */ + mysql_bin_log.wait_for_last_checkpoint_event(); + } } } if (options & REFRESH_RELAY_LOG) diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index b697a557476..877a93eec44 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1006,6 +1006,16 @@ wait_signal: if (!err) { sst_disallow_writes (thd.ptr, true); + /* + Lets also keep statements that modify binary logs (like RESET LOGS, + RESET MASTER) from proceeding until the files have been transferred + to the joiner node. + */ + if (mysql_bin_log.is_open()) + { + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + } + locked= true; goto wait_signal; } @@ -1014,6 +1024,11 @@ wait_signal: { if (locked) { + if (mysql_bin_log.is_open()) + { + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); locked= false; @@ -1046,6 +1061,11 @@ wait_signal: if (locked) // don't forget to unlock server before return { + if (mysql_bin_log.is_open()) + { + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); } |