summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
authorAndrei <andrei.elkin@mariadb.com>2023-03-18 21:11:07 +0200
committerAndrei <andrei.elkin@mariadb.com>2023-04-27 21:55:45 +0300
commit55a53949beac6e212b1232d3628d96b9b8121a49 (patch)
treece33df7a9e295c12de354caeece1d0c5960e4d25 /sql/slave.cc
parent7e75f94ba1a0b72b23a43220e4d81334a18097b4 (diff)
downloadmariadb-git-55a53949beac6e212b1232d3628d96b9b8121a49.tar.gz
MDEV-29621: Replica stopped by locks on sequence
When using binlog_row_image=FULL with sequence table inserts, a replica can deadlock because it treats full inserts in a sequence as DDL statements by getting an exclusive lock on the sequence table. It has been observed that with parallel replication, this exclusive lock on the sequence table can lead to a deadlock where one transaction has the exclusive lock and is waiting on a prior transaction to commit, whereas this prior transaction is waiting on the MDL lock. This fix for this is on the master side, to raise FL_DDL flag on the GTID of a full binlog_row_image write of a sequence table. This forces the slave to execute the statement serially so a deadlock cannot happen. A test verifies the deadlock also to prove it happen on the OLD (pre-fixes) slave. OLD (buggy master) -replication-> NEW (fixed slave) is provided. As the pre-fixes master's full row-image may represent both SELECT NEXT VALUE and INSERT, the parallel slave pessimistically waits for the prior transaction to have committed before to take on the critical part of the second (like INSERT in the test) event execution. The waiting exploits a parallel slave's retry mechanism which is controlled by `@@global.slave_transaction_retries`. Note that in order to avoid any persistent 'Deadlock found' 2013 error in OLD -> NEW, `slave_transaction_retries` may need to be set to a higher than the default value. START-SLAVE is an effective work-around if this still happens.
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc36
1 files changed, 29 insertions, 7 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index b64b9a64979..3f06c40e7c2 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -8028,14 +8028,15 @@ end:
@return TRUE if master has the bug, FALSE if it does not.
*/
bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
- bool (*pred)(const void *), const void *param)
+ bool (*pred)(const void *), const void *param,
+ bool maria_master)
{
struct st_version_range_for_one_bug {
uint bug_id;
const uchar introduced_in[3]; // first version with bug
const uchar fixed_in[3]; // first version with fix
};
- static struct st_version_range_for_one_bug versions_for_all_bugs[]=
+ static struct st_version_range_for_one_bug versions_for_their_bugs[]=
{
{24432, { 5, 0, 24 }, { 5, 0, 38 } },
{24432, { 5, 1, 12 }, { 5, 1, 17 } },
@@ -8043,13 +8044,30 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
{33029, { 5, 1, 0 }, { 5, 1, 12 } },
{37426, { 5, 1, 0 }, { 5, 1, 26 } },
};
+ static struct st_version_range_for_one_bug versions_for_our_bugs[]=
+ {
+ {29621, { 10, 3, 36 }, { 10, 3, 39 } },
+ {29621, { 10, 4, 26 }, { 10, 4, 29 } },
+ {29621, { 10, 5, 17 }, { 10, 5, 20 } },
+ {29621, { 10, 6, 9 }, { 10, 6, 13 } },
+ {29621, { 10, 7, 5 }, { 10, 7, 9 } },
+ {29621, { 10, 8, 4 }, { 10, 8, 8 } },
+ {29621, { 10, 9, 2 }, { 10, 9, 6 } },
+ {29621, { 10, 10,1 }, { 10, 10,4 } },
+ {29621, { 10, 11,1 }, { 10, 11,3 } },
+ };
const uchar *master_ver=
rli->relay_log.description_event_for_exec->server_version_split.ver;
DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split.ver) == 3);
- for (uint i= 0;
- i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
+ struct st_version_range_for_one_bug* versions_for_all_bugs= maria_master ?
+ versions_for_our_bugs : versions_for_their_bugs;
+ uint all_size= maria_master ?
+ sizeof(versions_for_our_bugs)/sizeof(*versions_for_our_bugs) :
+ sizeof(versions_for_their_bugs)/sizeof(*versions_for_their_bugs);
+
+ for (uint i= 0; i < all_size; i++)
{
const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
*fixed_in= versions_for_all_bugs[i].fixed_in;
@@ -8058,18 +8076,21 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
(memcmp(fixed_in, master_ver, 3) > 0) &&
(pred == NULL || (*pred)(param)))
{
+ const char *bug_source= maria_master ?
+ "https://jira.mariadb.org/browse/MDEV-" :
+ "http://bugs.mysql.com/bug.php?id=";
if (!report)
return TRUE;
// a short message for SHOW SLAVE STATUS (message length constraints)
my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
- " http://bugs.mysql.com/bug.php?id=%u"
+ " %s%u"
" so slave stops; check error log on slave"
- " for more info", MYF(0), bug_id);
+ " for more info", MYF(0), bug_source, bug_id);
// a verbose message for the error log
rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, NULL,
"According to the master's version ('%s'),"
" it is probable that master suffers from this bug:"
- " http://bugs.mysql.com/bug.php?id=%u"
+ " %s%u"
" and thus replicating the current binary log event"
" may make the slave's data become different from the"
" master's data."
@@ -8083,6 +8104,7 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
" equal to '%d.%d.%d'. Then replication can be"
" restarted.",
rli->relay_log.description_event_for_exec->server_version,
+ bug_source,
bug_id,
fixed_in[0], fixed_in[1], fixed_in[2]);
return TRUE;