summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
authorunknown <guilhem@mysql.com>2003-12-04 15:30:14 +0100
committerunknown <guilhem@mysql.com>2003-12-04 15:30:14 +0100
commit8479e5a379044acad53baa8f9b7a8cd9266c582a (patch)
treeebe7a79e0ce5811215ea0ef2a0b0c25db2c1e5b4 /sql/slave.cc
parente8fc6d460c379bbb7b5b4b75e9919ea226a62a43 (diff)
downloadmariadb-git-8479e5a379044acad53baa8f9b7a8cd9266c582a.tar.gz
Fix for BUG#2011 "rare race condition producing "binlog has bad magic number" error in slave".
The problem was that when the slave SQL thread reads a hot relay log (hot = the one being written to by the slave I/O thread), it must have the LOCK_log. It already took it for read_log_event(), but needs it also for check_binlog_magic(). This should fix all recently reported failures of the rpl_max_relay_size test in 4.1 and 5.0 (though the bug exists since 4.0, it showed up first in 5.0). sql/slave.cc: Fix for BUG#2011 "rare race condition producing "binlog has bad magic number" error in slave". The problem was that when the slave SQL thread reads a hot relay log (hot = the one being written to by the slave I/O thread), it must have the LOCK_log. It already took it for read_log_event(), but needs it also for check_binlog_magic().
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc31
1 files changed, 26 insertions, 5 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index 6816d968007..5bc31fd6a21 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3497,8 +3497,20 @@ rli->relay_log_pos=%s rli->pending=%lu",
sizeof(rli->relay_log_name)-1);
flush_relay_log_info(rli);
}
-
- // next log is hot
+
+ /*
+ Now we want to open this next log. To know if it's a hot log (the one
+ being written by the I/O thread now) or a cold log, we can use
+ is_active(); if it is hot, we use the I/O cache; if it's cold we open
+ the file normally. But if is_active() reports that the log is hot, this
+ may change between the test and the consequence of the test. So we may
+ open the I/O cache whereas the log is now cold, which is nonsense.
+ To guard against this, we need to have LOCK_log.
+ */
+
+ DBUG_PRINT("info",("hot_log: %d",hot_log));
+ if (!hot_log) /* if hot_log, we already have this mutex */
+ pthread_mutex_lock(log_lock);
if (rli->relay_log.is_active(rli->linfo.log_file_name))
{
#ifdef EXTRA_DEBUG
@@ -3511,15 +3523,24 @@ rli->relay_log_pos=%s rli->pending=%lu",
/*
Read pointer has to be at the start since we are the only
- reader
+ reader.
+ We must keep the LOCK_log to read the 4 first bytes, as this is a hot
+ log (same as when we call read_log_event() above: for a hot log we
+ take the mutex).
*/
if (check_binlog_magic(cur_log,&errmsg))
+ {
+ if (!hot_log) pthread_mutex_unlock(log_lock);
goto err;
+ }
+ if (!hot_log) pthread_mutex_unlock(log_lock);
continue;
}
+ if (!hot_log) pthread_mutex_unlock(log_lock);
/*
- if we get here, the log was not hot, so we will have to
- open it ourselves
+ if we get here, the log was not hot, so we will have to open it
+ ourselves. We are sure that the log is still not hot now (a log can get
+ from hot to cold, but not from cold to hot). No need for LOCK_log.
*/
#ifdef EXTRA_DEBUG
sql_print_error("next log '%s' is not active",