summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSujatha <sujatha.sivakumar@mariadb.com>2021-05-12 18:00:06 +0530
committerSujatha <sujatha.sivakumar@mariadb.com>2021-05-17 16:38:58 +0530
commit410e3c1a9a364219481392a408f12a432d83b2f2 (patch)
treeb8ab7b254c58592b5965dfd56d1a5b9f3871c37d
parent80ae3677e1b90a7f5d9dfc55ba0612e065b8ce97 (diff)
downloadmariadb-git-410e3c1a9a364219481392a408f12a432d83b2f2.tar.gz
MDEV-17515: GTID Replication in optimistic mode deadlock
Problem: ======= In slave_parallel_mode=optimistic configuration, when admin commands and DML operation on the same table are scheduled simultaneously for execution, it results in lock conflict and slave server either hangs due to deadlock or goes down with an assert. Analysis: ======== Admin commands OPTIMIZE, REPAIR and ANALYZE are written to binary log as ordinary transactions. When 'slave_parallel_mode' is 'optimistic' DMLs are allowed to run in parallel. But these locks are not detected by parallel replication deadlock detection-and-handling mechanism. At times they result in deadlock or assertion. Fix: === Flag admin commands as DDL in Gtid_log_event at the time of writing to binary log. Add a new bit EXECUTED_TABLE_ADMIN_CMD to 'm_unsafe_rollback_flags'. During 'mysql_admin_table' command execution it accepts a list of tables to be processed and executes them in a loop. Upon successful execution enable 'EXECUTED_TABLE_ADMIN_CMD' bit in thd->transaction.stmt_unsafe_rollback_flags. Gtid_log_event constructor will notice this flag and mark the current transaction with 'FL_DDL' flag. Gtid_log_events marked as FL_DDL will not be scheduled parallel execution, on the slave. They will execute in isolation to prevent deadlocks. Note: Removed the call to 'trans_commit_implicit' from 'mysql_admin_table' function as 'mysql_execute_command' will take care of invoking 'trans_commit_implicit'.
-rw-r--r--mysql-test/include/commit.inc2
-rw-r--r--mysql-test/r/commit_1innodb.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_mark_optimize_tbl_ddl.result77
-rw-r--r--mysql-test/suite/rpl/t/rpl_mark_optimize_tbl_ddl.test142
-rw-r--r--sql/handler.h10
-rw-r--r--sql/log_event.cc4
-rw-r--r--sql/sql_admin.cc55
-rw-r--r--sql/sql_class.h3
8 files changed, 257 insertions, 38 deletions
diff --git a/mysql-test/include/commit.inc b/mysql-test/include/commit.inc
index a28d3e5f3d1..1844a5320f7 100644
--- a/mysql-test/include/commit.inc
+++ b/mysql-test/include/commit.inc
@@ -770,7 +770,7 @@ call p_verify_status_increment(2, 0, 2, 0);
commit;
call p_verify_status_increment(0, 0, 0, 0);
check table t1, t2, t3;
-call p_verify_status_increment(6, 0, 6, 0);
+call p_verify_status_increment(4, 0, 4, 0);
commit;
call p_verify_status_increment(0, 0, 0, 0);
drop view v1;
diff --git a/mysql-test/r/commit_1innodb.result b/mysql-test/r/commit_1innodb.result
index 1adba7b4c4c..24d0f79325a 100644
--- a/mysql-test/r/commit_1innodb.result
+++ b/mysql-test/r/commit_1innodb.result
@@ -873,7 +873,7 @@ Table Op Msg_type Msg_text
test.t1 check status OK
test.t2 check status OK
test.t3 check status OK
-call p_verify_status_increment(6, 0, 6, 0);
+call p_verify_status_increment(4, 0, 4, 0);
SUCCESS
commit;
diff --git a/mysql-test/suite/rpl/r/rpl_mark_optimize_tbl_ddl.result b/mysql-test/suite/rpl/r/rpl_mark_optimize_tbl_ddl.result
new file mode 100644
index 00000000000..a39dad85244
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_mark_optimize_tbl_ddl.result
@@ -0,0 +1,77 @@
+include/rpl_init.inc [topology=1->2]
+connection server_1;
+FLUSH TABLES;
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+connection server_2;
+SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @save_slave_parallel_mode= @@GLOBAL.slave_parallel_mode;
+include/stop_slave.inc
+SET GLOBAL slave_parallel_threads=2;
+SET GLOBAL slave_parallel_mode=optimistic;
+include/start_slave.inc
+connection server_1;
+CREATE TABLE t1(a INT) ENGINE=INNODB;
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
+test.t1 optimize status OK
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 SELECT 1+a FROM t1;
+INSERT INTO t1 SELECT 2+a FROM t1;
+connection server_2;
+#
+# Verify that following admin commands are marked as ddl
+# 'OPTIMIZE TABLE', 'REPAIR TABLE' and 'ANALYZE TABLE'
+#
+connection server_1;
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
+test.t1 optimize status OK
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair note The storage engine for the table doesn't support repair
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+FLUSH LOGS;
+FOUND 1 /GTID 0-1-8 ddl/ in mysqlbinlog.out
+FOUND 1 /GTID 0-1-9 ddl/ in mysqlbinlog.out
+FOUND 1 /GTID 0-1-10 ddl/ in mysqlbinlog.out
+#
+# Clean up
+#
+DROP TABLE t1;
+connection server_2;
+FLUSH LOGS;
+#
+# Check that ALTER TABLE commands with ANALYZE, OPTIMIZE and REPAIR on
+# partitions will be marked as DDL in binary log.
+#
+connection server_1;
+CREATE TABLE t1(id INT) PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (100),
+PARTITION pmax VALUES LESS THAN (MAXVALUE));
+INSERT INTO t1 VALUES (1), (10), (100), (1000);
+ALTER TABLE t1 ANALYZE PARTITION p0;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+ALTER TABLE t1 OPTIMIZE PARTITION p0;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+ALTER TABLE t1 REPAIR PARTITION p0;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+FLUSH LOGS;
+FOUND 1 /GTID 0-1-14 ddl/ in mysqlbinlog.out
+FOUND 1 /GTID 0-1-15 ddl/ in mysqlbinlog.out
+FOUND 1 /GTID 0-1-16 ddl/ in mysqlbinlog.out
+#
+# Clean up
+#
+DROP TABLE t1;
+connection server_2;
+include/stop_slave.inc
+SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads;
+SET GLOBAL slave_parallel_mode= @save_slave_parallel_mode;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_mark_optimize_tbl_ddl.test b/mysql-test/suite/rpl/t/rpl_mark_optimize_tbl_ddl.test
new file mode 100644
index 00000000000..6d66e3fd088
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_mark_optimize_tbl_ddl.test
@@ -0,0 +1,142 @@
+# ==== Purpose ====
+#
+# Test verifies that there is no deadlock or assertion in
+# slave_parallel_mode=optimistic configuration while applying admin command
+# like 'OPTIMIZE TABLE', 'REPAIR TABLE' and 'ANALYZE TABLE'.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Create a table, execute OPTIMIZE TABLE command on the table followed
+# by some DMLS.
+# 1 - No assert should happen on slave server.
+# 2 - Assert that 'OPTIMIZE TABLE', 'REPAIR TABLE' and 'ANALYZE TABLE' are
+# marked as 'DDL' in the binary log.
+#
+# ==== References ====
+#
+# MDEV-17515: GTID Replication in optimistic mode deadlock
+#
+--source include/have_partition.inc
+--source include/have_innodb.inc
+--let $rpl_topology=1->2
+--source include/rpl_init.inc
+
+--connection server_1
+FLUSH TABLES;
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+
+--connection server_2
+SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @save_slave_parallel_mode= @@GLOBAL.slave_parallel_mode;
+--source include/stop_slave.inc
+SET GLOBAL slave_parallel_threads=2;
+SET GLOBAL slave_parallel_mode=optimistic;
+--source include/start_slave.inc
+
+--connection server_1
+CREATE TABLE t1(a INT) ENGINE=INNODB;
+OPTIMIZE TABLE t1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 SELECT 1+a FROM t1;
+INSERT INTO t1 SELECT 2+a FROM t1;
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+
+--echo #
+--echo # Verify that following admin commands are marked as ddl
+--echo # 'OPTIMIZE TABLE', 'REPAIR TABLE' and 'ANALYZE TABLE'
+--echo #
+--connection server_1
+
+OPTIMIZE TABLE t1;
+--let optimize_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+REPAIR TABLE t1;
+--let repair_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+ANALYZE TABLE t1;
+--let analyze_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1);
+FLUSH LOGS;
+
+--let $MYSQLD_DATADIR= `select @@datadir`
+--exec $MYSQL_BINLOG $MYSQLD_DATADIR/$binlog_file > $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+
+--let SEARCH_PATTERN= GTID $optimize_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--let SEARCH_PATTERN= GTID $repair_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--let SEARCH_PATTERN= GTID $analyze_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--echo #
+--echo # Clean up
+--echo #
+DROP TABLE t1;
+--remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+FLUSH LOGS;
+
+--echo #
+--echo # Check that ALTER TABLE commands with ANALYZE, OPTIMIZE and REPAIR on
+--echo # partitions will be marked as DDL in binary log.
+--echo #
+--connection server_1
+CREATE TABLE t1(id INT) PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (100),
+ PARTITION pmax VALUES LESS THAN (MAXVALUE));
+INSERT INTO t1 VALUES (1), (10), (100), (1000);
+
+ALTER TABLE t1 ANALYZE PARTITION p0;
+--let analyze_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+ALTER TABLE t1 OPTIMIZE PARTITION p0;
+--let optimize_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+ALTER TABLE t1 REPAIR PARTITION p0;
+--let repair_gtid= `SELECT @@GLOBAL.gtid_binlog_pos`
+
+let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1);
+FLUSH LOGS;
+
+--exec $MYSQL_BINLOG $MYSQLD_DATADIR/$binlog_file > $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+
+--let SEARCH_PATTERN= GTID $analyze_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--let SEARCH_PATTERN= GTID $optimize_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--let SEARCH_PATTERN= GTID $repair_gtid ddl
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--source include/search_pattern_in_file.inc
+
+--echo #
+--echo # Clean up
+--echo #
+DROP TABLE t1;
+--remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog.out
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+
+--source include/stop_slave.inc
+SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads;
+SET GLOBAL slave_parallel_mode= @save_slave_parallel_mode;
+--source include/start_slave.inc
+
+--source include/rpl_end.inc
diff --git a/sql/handler.h b/sql/handler.h
index e0e0604176d..f618c1d6469 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1481,7 +1481,17 @@ struct THD_TRANS
static unsigned int const DROPPED_TEMP_TABLE= 0x04;
static unsigned int const DID_WAIT= 0x08;
static unsigned int const DID_DDL= 0x10;
+ static unsigned int const EXECUTED_TABLE_ADMIN_CMD= 0x20;
+ void mark_executed_table_admin_cmd()
+ {
+ DBUG_PRINT("debug", ("mark_executed_table_admin_cmd"));
+ m_unsafe_rollback_flags|= EXECUTED_TABLE_ADMIN_CMD;
+ }
+ bool trans_executed_admin_cmd()
+ {
+ return (m_unsafe_rollback_flags & EXECUTED_TABLE_ADMIN_CMD) != 0;
+ }
void mark_created_temp_table()
{
DBUG_PRINT("debug", ("mark_created_temp_table"));
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 94b2af20354..04577be4f6f 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -7561,8 +7561,10 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg,
flags2|= FL_WAITED;
if (thd_arg->transaction.stmt.trans_did_ddl() ||
thd_arg->transaction.stmt.has_created_dropped_temp_table() ||
+ thd_arg->transaction.stmt.trans_executed_admin_cmd() ||
thd_arg->transaction.all.trans_did_ddl() ||
- thd_arg->transaction.all.has_created_dropped_temp_table())
+ thd_arg->transaction.all.has_created_dropped_temp_table() ||
+ thd_arg->transaction.all.trans_executed_admin_cmd())
flags2|= FL_DDL;
else if (is_transactional && !is_tmp_table)
flags2|= FL_TRANSACTIONAL;
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index 17eede61337..2b3593388bb 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -434,7 +434,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
int (handler::*operator_func)(THD *,
HA_CHECK_OPT *),
int (view_operator_func)(THD *, TABLE_LIST*,
- HA_CHECK_OPT *))
+ HA_CHECK_OPT *),
+ bool is_cmd_replicated)
{
TABLE_LIST *table;
List<Item> field_list;
@@ -1147,6 +1148,13 @@ send_result_message:
break;
}
}
+ /*
+ Admin commands acquire table locks and these locks are not detected by
+ parallel replication deadlock detection-and-handling mechanism. Hence
+ they must be marked as DDL so that they are not scheduled in parallel
+ with conflicting DMLs resulting in deadlock.
+ */
+ thd->transaction.stmt.mark_executed_table_admin_cmd();
if (table->table && !table->view)
{
if (table->table->s->tmp_table)
@@ -1182,9 +1190,7 @@ send_result_message:
}
else
{
- if (trans_commit_stmt(thd) ||
- (stmt_causes_implicit_commit(thd, CF_IMPLICIT_COMMIT_END) &&
- trans_commit_implicit(thd)))
+ if (trans_commit_stmt(thd))
goto err;
}
close_thread_tables(thd);
@@ -1209,6 +1215,11 @@ send_result_message:
if (protocol->write())
goto err;
}
+ if (is_cmd_replicated && !thd->lex->no_write_to_binlog)
+ {
+ if (write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
+ goto err;
+ }
my_eof(thd);
thd->resume_subsequent_commits(suspended_wfc);
@@ -1270,7 +1281,7 @@ bool mysql_assign_to_keycache(THD* thd, TABLE_LIST* tables,
check_opt.key_cache= key_cache;
DBUG_RETURN(mysql_admin_table(thd, tables, &check_opt,
"assign_to_keycache", TL_READ_NO_INSERT, 0, 0,
- 0, 0, &handler::assign_to_keycache, 0));
+ 0, 0, &handler::assign_to_keycache, 0, false));
}
@@ -1297,7 +1308,7 @@ bool mysql_preload_keys(THD* thd, TABLE_LIST* tables)
*/
DBUG_RETURN(mysql_admin_table(thd, tables, 0,
"preload_keys", TL_READ_NO_INSERT, 0, 0, 0, 0,
- &handler::preload_keys, 0));
+ &handler::preload_keys, 0, false));
}
@@ -1315,15 +1326,7 @@ bool Sql_cmd_analyze_table::execute(THD *thd)
WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
res= mysql_admin_table(thd, first_table, &m_lex->check_opt,
"analyze", lock_type, 1, 0, 0, 0,
- &handler::ha_analyze, 0);
- /* ! we write after unlocking the table */
- if (!res && !m_lex->no_write_to_binlog)
- {
- /*
- Presumably, ANALYZE and binlog writing doesn't require synchronization
- */
- res= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
- }
+ &handler::ha_analyze, 0, true);
m_lex->select_lex.table_list.first= first_table;
m_lex->query_tables= first_table;
@@ -1346,7 +1349,7 @@ bool Sql_cmd_check_table::execute(THD *thd)
goto error; /* purecov: inspected */
res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "check",
lock_type, 0, 0, HA_OPEN_FOR_REPAIR, 0,
- &handler::ha_check, &view_check);
+ &handler::ha_check, &view_check, false);
m_lex->select_lex.table_list.first= first_table;
m_lex->query_tables= first_table;
@@ -1371,15 +1374,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd)
mysql_recreate_table(thd, first_table, true) :
mysql_admin_table(thd, first_table, &m_lex->check_opt,
"optimize", TL_WRITE, 1, 0, 0, 0,
- &handler::ha_optimize, 0);
- /* ! we write after unlocking the table */
- if (!res && !m_lex->no_write_to_binlog)
- {
- /*
- Presumably, OPTIMIZE and binlog writing doesn't require synchronization
- */
- res= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
- }
+ &handler::ha_optimize, 0, true);
m_lex->select_lex.table_list.first= first_table;
m_lex->query_tables= first_table;
@@ -1404,16 +1399,8 @@ bool Sql_cmd_repair_table::execute(THD *thd)
TL_WRITE, 1,
MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM),
HA_OPEN_FOR_REPAIR, &prepare_for_repair,
- &handler::ha_repair, &view_repair);
+ &handler::ha_repair, &view_repair, true);
- /* ! we write after unlocking the table */
- if (!res && !m_lex->no_write_to_binlog)
- {
- /*
- Presumably, REPAIR and binlog writing doesn't require synchronization
- */
- res= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
- }
m_lex->select_lex.table_list.first= first_table;
m_lex->query_tables= first_table;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index e08bb3e6358..890cce7bcb2 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -4530,7 +4530,8 @@ public:
transaction.all.m_unsafe_rollback_flags|=
(transaction.stmt.m_unsafe_rollback_flags &
(THD_TRANS::DID_WAIT | THD_TRANS::CREATED_TEMP_TABLE |
- THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL));
+ THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL |
+ THD_TRANS::EXECUTED_TABLE_ADMIN_CMD));
}
/*
Reset current_linfo