diff options
Diffstat (limited to 'mysql-test/suite/rpl/t/rpl_parallel.test')
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_parallel.test | 474 |
1 files changed, 473 insertions, 1 deletions
diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index d4b99d4b0f7..feac32b1454 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -12,9 +12,20 @@ SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; SET GLOBAL slave_parallel_threads=10; --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=10; + +# Check that we do not spawn any worker threads when no slave is running. +SELECT IF(COUNT(*) < 10, "OK", CONCAT("Found too many system user processes: ", COUNT(*))) FROM information_schema.processlist WHERE user = "system user"; + CHANGE MASTER TO master_use_gtid=slave_pos; --source include/start_slave.inc +# Check that worker threads get spawned when slave starts. +SELECT IF(COUNT(*) >= 10, "OK", CONCAT("Found too few system user processes: ", COUNT(*))) FROM information_schema.processlist WHERE user = "system user"; +# ... and that worker threads get removed when slave stops. +--source include/stop_slave.inc +SELECT IF(COUNT(*) < 10, "OK", CONCAT("Found too many system user processes: ", COUNT(*))) FROM information_schema.processlist WHERE user = "system user"; +--source include/start_slave.inc +SELECT IF(COUNT(*) >= 10, "OK", CONCAT("Found too few system user processes: ", COUNT(*))) FROM information_schema.processlist WHERE user = "system user"; --echo *** Test long-running query in domain 1 can run in parallel with short queries in domain 0 *** @@ -1843,6 +1854,467 @@ SET GLOBAL slave_parallel_threads=10; --source include/start_slave.inc +--echo *** MDEV-7335: Potential parallel slave deadlock with specific binlog corruption *** + +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=1; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000"; + +--connection server_1 +INSERT INTO t2 VALUES (101); +INSERT INTO t2 VALUES (102); +INSERT INTO t2 VALUES (103); +INSERT INTO t2 VALUES (104); +INSERT INTO t2 VALUES (105); +# Inject a partial event group (missing XID at the end). The bug was that such +# partial group was not handled appropriately, leading to server deadlock. +SET gtid_seq_no=1000; +INSERT INTO t2 VALUES (106); +INSERT INTO t2 VALUES (107); +INSERT INTO t2 VALUES (108); +INSERT INTO t2 VALUES (109); +INSERT INTO t2 VALUES (110); +INSERT INTO t2 VALUES (111); +INSERT INTO t2 VALUES (112); +INSERT INTO t2 VALUES (113); +INSERT INTO t2 VALUES (114); +INSERT INTO t2 VALUES (115); +INSERT INTO t2 VALUES (116); +INSERT INTO t2 VALUES (117); +INSERT INTO t2 VALUES (118); +INSERT INTO t2 VALUES (119); +INSERT INTO t2 VALUES (120); +INSERT INTO t2 VALUES (121); +INSERT INTO t2 VALUES (122); +INSERT INTO t2 VALUES (123); +INSERT INTO t2 VALUES (124); +INSERT INTO t2 VALUES (125); +INSERT INTO t2 VALUES (126); +INSERT INTO t2 VALUES (127); +INSERT INTO t2 VALUES (128); +INSERT INTO t2 VALUES (129); +INSERT INTO t2 VALUES (130); +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +# The partial event group (a=106) should be rolled back and thus missing. +SELECT * FROM t2 WHERE a >= 100 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL debug_dbug=@old_dbug; +SET GLOBAL slave_parallel_threads=10; +--source include/start_slave.inc + + +--echo *** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging *** +--echo *** MDEV-7882: Excessive transaction retry in parallel replication *** + +--connection server_1 +CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=40; +SELECT @old_retries:=@@GLOBAL.slave_transaction_retries; +SET GLOBAL slave_transaction_retries= 5; + + +# Using dbug error injection, we artificially create event groups with a lot of +# conflicting transactions in each event group. The bugs were originally seen +# "in the wild" with transactions that did not conflict on the master, and only +# conflicted very rarely on the slave (maybe some edge case with InnoDB btree +# page splits or something like that). The event groups here loosely reflect +# the structure of the original failure's group commits. + + +--connection server_1 +INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +SET @old_dbug= @@SESSION.debug_dbug; +SET @commit_id= 42; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +INSERT INTO t8 VALUES (1,1); +INSERT INTO t8 VALUES (2,2); +INSERT INTO t8 VALUES (3,3); +INSERT INTO t8 VALUES (4,4); +INSERT INTO t8 VALUES (5,5); +INSERT INTO t8 VALUES (6,6); +INSERT INTO t8 VALUES (7,7); +INSERT INTO t8 VALUES (8,8); + +UPDATE t7 SET b=9 WHERE a=3; +UPDATE t7 SET b=10 WHERE a=3; +UPDATE t7 SET b=11 WHERE a=3; + +INSERT INTO t8 VALUES (12,12); +INSERT INTO t8 VALUES (13,13); + +UPDATE t7 SET b=14 WHERE a=3; +UPDATE t7 SET b=15 WHERE a=3; + +INSERT INTO t8 VALUES (16,16); + +UPDATE t7 SET b=17 WHERE a=3; + +INSERT INTO t8 VALUES (18,18); +INSERT INTO t8 VALUES (19,19); + +UPDATE t7 SET b=20 WHERE a=3; + +INSERT INTO t8 VALUES (21,21); + +UPDATE t7 SET b=22 WHERE a=3; + +INSERT INTO t8 VALUES (23,24); +INSERT INTO t8 VALUES (24,24); + +UPDATE t7 SET b=25 WHERE a=3; + +INSERT INTO t8 VALUES (26,26); + +UPDATE t7 SET b=27 WHERE a=3; + +BEGIN; +INSERT INTO t8 VALUES (28,28); +INSERT INTO t8 VALUES (29,28), (30,28); +INSERT INTO t8 VALUES (31,28); +INSERT INTO t8 VALUES (32,28); +INSERT INTO t8 VALUES (33,28); +INSERT INTO t8 VALUES (34,28); +INSERT INTO t8 VALUES (35,28); +INSERT INTO t8 VALUES (36,28); +INSERT INTO t8 VALUES (37,28); +INSERT INTO t8 VALUES (38,28); +INSERT INTO t8 VALUES (39,28); +INSERT INTO t8 VALUES (40,28); +INSERT INTO t8 VALUES (41,28); +INSERT INTO t8 VALUES (42,28); +COMMIT; + + +SET @commit_id=43; +INSERT INTO t8 VALUES (43,43); +INSERT INTO t8 VALUES (44,44); + +UPDATE t7 SET b=45 WHERE a=3; + +INSERT INTO t8 VALUES (46,46); +INSERT INTO t8 VALUES (47,47); + +UPDATE t7 SET b=48 WHERE a=3; + +INSERT INTO t8 VALUES (49,49); +INSERT INTO t8 VALUES (50,50); + + +SET @commit_id=44; +INSERT INTO t8 VALUES (51,51); +INSERT INTO t8 VALUES (52,52); + +UPDATE t7 SET b=53 WHERE a=3; + +INSERT INTO t8 VALUES (54,54); +INSERT INTO t8 VALUES (55,55); + +UPDATE t7 SET b=56 WHERE a=3; + +INSERT INTO t8 VALUES (57,57); + +UPDATE t7 SET b=58 WHERE a=3; + +INSERT INTO t8 VALUES (58,58); +INSERT INTO t8 VALUES (59,59); +INSERT INTO t8 VALUES (60,60); +INSERT INTO t8 VALUES (61,61); + +UPDATE t7 SET b=62 WHERE a=3; + +INSERT INTO t8 VALUES (63,63); +INSERT INTO t8 VALUES (64,64); +INSERT INTO t8 VALUES (65,65); +INSERT INTO t8 VALUES (66,66); + +UPDATE t7 SET b=67 WHERE a=3; + +INSERT INTO t8 VALUES (68,68); + +UPDATE t7 SET b=69 WHERE a=3; +UPDATE t7 SET b=70 WHERE a=3; +UPDATE t7 SET b=71 WHERE a=3; + +INSERT INTO t8 VALUES (72,72); + +UPDATE t7 SET b=73 WHERE a=3; +UPDATE t7 SET b=74 WHERE a=3; +UPDATE t7 SET b=75 WHERE a=3; +UPDATE t7 SET b=76 WHERE a=3; + +INSERT INTO t8 VALUES (77,77); + +UPDATE t7 SET b=78 WHERE a=3; + +INSERT INTO t8 VALUES (79,79); + +UPDATE t7 SET b=80 WHERE a=3; + +INSERT INTO t8 VALUES (81,81); + +UPDATE t7 SET b=82 WHERE a=3; + +INSERT INTO t8 VALUES (83,83); + +UPDATE t7 SET b=84 WHERE a=3; + + +SET @commit_id=45; +INSERT INTO t8 VALUES (85,85); +UPDATE t7 SET b=86 WHERE a=3; +INSERT INTO t8 VALUES (87,87); + + +SET @commit_id=46; +INSERT INTO t8 VALUES (88,88); +INSERT INTO t8 VALUES (89,89); +INSERT INTO t8 VALUES (90,90); + +SET SESSION debug_dbug=@old_dbug; + +INSERT INTO t8 VALUES (91,91); +INSERT INTO t8 VALUES (92,92); +INSERT INTO t8 VALUES (93,93); +INSERT INTO t8 VALUES (94,94); +INSERT INTO t8 VALUES (95,95); +INSERT INTO t8 VALUES (96,96); +INSERT INTO t8 VALUES (97,97); +INSERT INTO t8 VALUES (98,98); +INSERT INTO t8 VALUES (99,99); + + +SELECT * FROM t7 ORDER BY a; +SELECT * FROM t8 ORDER BY a; +--source include/save_master_gtid.inc + + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SELECT * FROM t7 ORDER BY a; +SELECT * FROM t8 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL slave_transaction_retries= @old_retries; +SET GLOBAL slave_parallel_threads=10; +--source include/start_slave.inc + + +--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang *** + +--connection server_2 +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep'; + +--connection server_1 +# Inject two group commits. The bug was that ANALYZE TABLE would call +# wakeup_subsequent_commits() too early, allowing the following transaction +# in the same group to run ahead and binlog and free the GCO. Then we get +# wrong binlog order and later access freed GCO, which causes lost wakeup +# of following GCO and thus replication hang. +# We injected a small sleep in ANALYZE to make the race easier to hit (this +# can only cause false negatives in versions with the bug, not false positives, +# so sleep is ok here. And it's in general not possible to trigger reliably +# the race with debug_sync, since the bugfix makes the race impossible). + +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +# Group commit with cid=10000, two event groups. +SET @commit_id= 10000; +ANALYZE TABLE t2; +INSERT INTO t3 VALUES (120, 0); + +# Group commit with cid=10001, one event group. +SET @commit_id= 10001; +INSERT INTO t3 VALUES (121, 0); + +SET SESSION debug_dbug=@old_dbug; + +SELECT * FROM t3 WHERE a >= 120 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +SELECT * FROM t3 WHERE a >= 120 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_dbug; +--source include/start_slave.inc + + +--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. *** + +--connection server_2 +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep'; + +--connection server_1 +# Inject two group commits. The bug was that record_gtid for a +# non-transactional event group would commit its own transaction, which would +# cause ha_commit_trans() to call wakeup_subsequent_commits() too early. This +# in turn lead to access to freed group_commit_orderer object, losing a wakeup +# and causing slave threads to hang. +# We inject a small sleep in the corresponding record_gtid() to make the race +# easier to hit. + +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +# Group commit with cid=10010, two event groups. +SET @old_server_id= @@SESSION.server_id; +SET SESSION server_id= 100; +SET @commit_id= 10010; +ALTER TABLE t1 COMMENT "Hulubulu!"; +SET SESSION server_id= @old_server_id; +INSERT INTO t3 VALUES (130, 0); + +# Group commit with cid=10011, one event group. +SET @commit_id= 10011; +INSERT INTO t3 VALUES (131, 0); + +SET SESSION debug_dbug=@old_dbug; + +SELECT * FROM t3 WHERE a >= 130 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +SELECT * FROM t3 WHERE a >= 130 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_dbug; +--source include/start_slave.inc + + +--echo *** MDEV-8031: Parallel replication stops on "connection killed" error (probably incorrectly handled deadlock kill) *** + +--connection server_1 +INSERT INTO t3 VALUES (201,0), (202,0); +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_mdev8031'; + +--connection server_1 +# We artificially create a situation that hopefully resembles the original +# bug which was only seen "in the wild", and only once. +# Setup a fake group commit with lots of conflicts that will lead to deadloc +# kill. The slave DBUG injection causes the slave to be deadlock killed at +# a particular point during the retry, and then later do a small sleep at +# another critical point where the prior transaction then has a chance to +# complete. Finally an extra KILL check catches an unhandled, lingering +# deadlock kill. So rather artificial, but at least it exercises the +# relevant code paths. +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +SET @commit_id= 10200; +INSERT INTO t3 VALUES (203, 1); +INSERT INTO t3 VALUES (204, 1); +INSERT INTO t3 VALUES (205, 1); +UPDATE t3 SET b=b+1 WHERE a=201; +UPDATE t3 SET b=b+1 WHERE a=201; +UPDATE t3 SET b=b+1 WHERE a=201; +UPDATE t3 SET b=b+1 WHERE a=202; +UPDATE t3 SET b=b+1 WHERE a=202; +UPDATE t3 SET b=b+1 WHERE a=202; +UPDATE t3 SET b=b+1 WHERE a=202; +UPDATE t3 SET b=b+1 WHERE a=203; +UPDATE t3 SET b=b+1 WHERE a=203; +UPDATE t3 SET b=b+1 WHERE a=204; +UPDATE t3 SET b=b+1 WHERE a=204; +UPDATE t3 SET b=b+1 WHERE a=204; +UPDATE t3 SET b=b+1 WHERE a=203; +UPDATE t3 SET b=b+1 WHERE a=205; +UPDATE t3 SET b=b+1 WHERE a=205; +SET SESSION debug_dbug=@old_dbug; + +SELECT * FROM t3 WHERE a>=200 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +SELECT * FROM t3 WHERE a>=200 ORDER BY a; +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_dbug; +--source include/start_slave.inc + + +--echo *** Check getting deadlock killed inside open_binlog() during retry. *** + +--connection server_2 +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_retry_event_group_open_binlog_kill'; +SET @old_max= @@GLOBAL.max_relay_log_size; +SET GLOBAL max_relay_log_size= 4096; + +--connection server_1 +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +--let $large= `SELECT REPEAT("*", 8192)` +SET @commit_id= 10210; +--echo Omit long queries that cause relaylog rotations and transaction retries... +--disable_query_log +eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */; +eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */; +--enable_query_log +SET SESSION debug_dbug=@old_dbug; + +SELECT * FROM t3 WHERE a>=200 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +SELECT * FROM t3 WHERE a>=200 ORDER BY a; +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_debg; +SET GLOBAL max_relay_log_size= @old_max; +--source include/start_slave.inc + + + # Clean up. --connection server_2 --source include/stop_slave.inc @@ -1852,7 +2324,7 @@ SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc |