summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-05-21 14:53:43 -0600
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2021-08-13 10:53:19 -0600
commit46c3e7e3537c31a94289033bfeccf3faf8d4069e (patch)
tree5c676e739db8272f3b25fa2e93d5344898a43a0d
parent38b79d7295e7a9ed2e4fdd72e6a63505a32c806f (diff)
downloadmariadb-git-46c3e7e3537c31a94289033bfeccf3faf8d4069e.tar.gz
MDEV-20215: binlog.show_concurrent_rotate failed in buildbot with wrong result
Problem: ======= There are two issues that are addressed in this patch: 1) SHOW BINARY LOGS uses caching to store the binary logs that exist in the log directory; however, if new events are written to the logs, the caching strategy is unaware. This is okay for users, as it is okay for SHOW to return slightly old data. The test, however, can result in inconsistent data. It runs two connections concurrently, where one shows the logs, and the other adds a new file. The output of SHOW BINARY LOGS then depends on when the cache is built, with respect to the time that the second connection rotates the logs. 2) There is a race condition between RESET MASTER and SHOW BINARY LOGS. More specifically, where they both need the binary log lock to begin, SHOW BINARY LOGS only needs the lock to build its cache. If RESET MASTER is issued after SHOW BINARY LOGS has built its cache and before it has returned the results, the presented data may be incorrect. Solution: ======== 1) As it is okay for users to see stale data, to make the test consistent, use DEBUG_SYNC to force the race condition (problem 2) to make SHOW BINARY LOGS build a cache before RESET MASTER is called. Then, use additional logic from the next part of the solution to rebuild the cache. 2) Use an Atomic_counter to keep track of the number of times RESET MASTER has been called. If the value of the counter changes after building the cache, the cache should be rebuilt and the analysis should be restarted. Reviewed By: ============ Andrei Elkin: <andrei.elkin@mariadb.com>
-rw-r--r--mysql-test/suite/binlog/r/show_concurrent_rotate.result3
-rw-r--r--mysql-test/suite/binlog/t/show_concurrent_rotate.test7
-rw-r--r--sql/log.cc3
-rw-r--r--sql/log.h6
-rw-r--r--sql/sql_repl.cc12
5 files changed, 28 insertions, 3 deletions
diff --git a/mysql-test/suite/binlog/r/show_concurrent_rotate.result b/mysql-test/suite/binlog/r/show_concurrent_rotate.result
index cee5de33973..b830b75eeef 100644
--- a/mysql-test/suite/binlog/r/show_concurrent_rotate.result
+++ b/mysql-test/suite/binlog/r/show_concurrent_rotate.result
@@ -2,9 +2,10 @@ connect con1,localhost,root,,;
FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
-SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
+SET DEBUG_SYNC= "at_after_lock_index SIGNAL con1_ready WAIT_FOR con1_go";
SHOW BINARY LOGS;
connect con2,localhost,root,,;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";
diff --git a/mysql-test/suite/binlog/t/show_concurrent_rotate.test b/mysql-test/suite/binlog/t/show_concurrent_rotate.test
index 79d36c30a86..b5758e3a883 100644
--- a/mysql-test/suite/binlog/t/show_concurrent_rotate.test
+++ b/mysql-test/suite/binlog/t/show_concurrent_rotate.test
@@ -8,10 +8,15 @@ FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
-SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
+# This forced synchronization pattern ensures con1 will execute its retry
+# path. More specifically, con1 should see that the cache of log files it
+# creates during SHOW BINARY LOGS becomes invalidated after con2 completes
+# RESET MASTER.
+SET DEBUG_SYNC= "at_after_lock_index SIGNAL con1_ready WAIT_FOR con1_go";
--send SHOW BINARY LOGS
connect(con2,localhost,root,,);
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";
diff --git a/sql/log.cc b/sql/log.cc
index eb82100d16c..93ed664bff9 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3256,7 +3256,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
description_event_for_exec(0), description_event_for_queue(0),
- current_binlog_id(0)
+ current_binlog_id(0), reset_master_count(0)
{
/*
We don't want to initialize locks here as such initialization depends on
@@ -4350,6 +4350,7 @@ err:
}
mysql_cond_broadcast(&COND_xid_list);
reset_master_pending--;
+ reset_master_count++;
mysql_mutex_unlock(&LOCK_xid_list);
}
diff --git a/sql/log.h b/sql/log.h
index 8ba39614825..8994d536298 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -672,6 +672,11 @@ public:
my_off_t last_commit_pos_offset;
ulong current_binlog_id;
+ /*
+ Tracks the number of times that the master has been reset
+ */
+ Atomic_counter<uint64> reset_master_count;
+
MYSQL_BIN_LOG(uint *sync_period);
/*
note that there's no destructor ~MYSQL_BIN_LOG() !
@@ -881,6 +886,7 @@ public:
inline mysql_mutex_t* get_log_lock() { return &LOCK_log; }
inline mysql_cond_t* get_bin_log_cond() { return &COND_bin_log_updated; }
inline IO_CACHE* get_log_file() { return &log_file; }
+ inline uint64 get_reset_master_count() { return reset_master_count; }
inline void lock_index() { mysql_mutex_lock(&LOCK_index);}
inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);}
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 841ff561502..e96129465fe 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -4374,6 +4374,7 @@ bool show_binlogs(THD* thd)
Protocol *protocol= thd->protocol;
uint retry_count= 0;
size_t cur_dir_len;
+ uint64 expected_reset_masters;
DBUG_ENTER("show_binlogs");
if (!mysql_bin_log.is_open())
@@ -4399,6 +4400,7 @@ retry:
mysql_mutex_lock(mysql_bin_log.get_log_lock());
mysql_bin_log.lock_index();
mysql_bin_log.raw_get_current_log(&cur);
+ expected_reset_masters= mysql_bin_log.get_reset_master_count();
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
/* The following call unlocks lock_index */
@@ -4419,6 +4421,16 @@ retry:
cur_link->name.str+= dir_len;
cur_link->name.length-= dir_len;
+ if (mysql_bin_log.get_reset_master_count() > expected_reset_masters)
+ {
+ /*
+ Reset master was called after we cached filenames.
+ Reinitialize the cache.
+ */
+ free_root(&mem_root, MYF(MY_MARK_BLOCKS_FREE));
+ goto retry;
+ }
+
if (!(strncmp(fname+dir_len, cur.log_file_name+cur_dir_len, length)))
cur_link->size= cur.pos; /* The active log, use the active position */
else