mysql-test/suite/rpl/t/rpl_parallel2.test


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

--source include/have_debug.inc
--source include/have_innodb.inc
--source include/have_binlog_format_statement.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc

--echo *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication ***

--connection server_2
SET STATEMENT sql_log_bin=0 FOR call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
set @old_parallel_mode= @@GLOBAL.slave_parallel_mode;
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=5;
set global slave_parallel_mode= optimistic;
--source include/start_slave.inc

--connection server_1
CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave");
--save_master_pos

--connection server_2
--sync_with_master

--connection server_1
INSERT INTO t1 VALUES (1,sleep(2));
--save_master_pos

--connection server_2
--sync_with_master

# The slave position (which --sync_with_master waits for) is updated just
# before the Seconds_Behind_Master. So we have to wait for the zero status
# to appear, otherwise there is a small window between --sync_with_master
# and SHOW SLAVE STATUS where we can see a non-zero value.
--let $slave_param= Seconds_Behind_Master
--let $slave_param_value= 0
--source include/wait_for_slave_param.inc
--echo Seconds_Behind_Master should be zero here because the slave is fully caught up and idle.
--let $status_items= Seconds_Behind_Master
--source include/show_slave_status.inc


--echo *** MDEV-8294: Inconsistent behavior of slave parallel threads at runtime ***

--connection server_1
INSERT INTO t1 VALUES (10,0);
# Force a duplicate key error on the slave.
SET sql_log_bin= 0;
DELETE FROM t1 WHERE a=10;
SET sql_log_bin= 1;
INSERT INTO t1 VALUES (10,0);
--save_master_pos
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;

--connection server_2
--let $slave_sql_errno= 1062,1593
--source include/wait_for_slave_sql_error.inc

# At this point, the worker threads should have stopped also.
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc

# Check that the pool can still be resized, but remains inactive as no slave
# SQL thread is running.
SET GLOBAL slave_parallel_threads=8;
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc

STOP SLAVE;
# At this point, the worker threads should have stopped.
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc


SET GLOBAL sql_slave_skip_counter= 1;
--source include/start_slave.inc
# At this point, the worker threads should have been spawned.
--let $wait_condition= SELECT COUNT(*)=8 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;


--echo *** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***

--connection server_1
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
--save_master_pos

--connection server_2
--sync_with_master
--source include/stop_slave.inc

--connection server_1
# Create a group commit with two transactions, will be used to provoke the
# problematic thread interaction with FTWRL on the slave.
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 4242;
SET SESSION debug_dbug="+d,binlog_force_commit_id";

BEGIN;
UPDATE t2 SET b=b+1 WHERE a=2;
COMMIT;

BEGIN;
INSERT INTO t2 VALUES (4,10);
COMMIT;

SET SESSION debug_dbug= @old_dbug;

INSERT INTO t2 VALUES (5,0);
INSERT INTO t2 VALUES (6,0);
INSERT INTO t2 VALUES (7,0);
INSERT INTO t2 VALUES (8,0);
INSERT INTO t2 VALUES (9,0);
INSERT INTO t2 VALUES (10,0);
INSERT INTO t2 VALUES (11,0);
INSERT INTO t2 VALUES (12,0);
INSERT INTO t2 VALUES (13,0);
INSERT INTO t2 VALUES (14,0);
INSERT INTO t2 VALUES (15,0);
INSERT INTO t2 VALUES (16,0);
INSERT INTO t2 VALUES (17,0);
INSERT INTO t2 VALUES (18,0);
INSERT INTO t2 VALUES (19,0);
--save_master_pos

--connection server_2

--connect (s1, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
# Block one transaction on a row lock.
BEGIN;
SELECT * FROM t2 WHERE a=2 FOR UPDATE;

--connection server_2

# Wait for slave thread of the other transaction to have the commit lock.
--source include/start_slave.inc
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc

--connect (s2, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
send FLUSH TABLES WITH READ LOCK;
# The bug was that at this point we were deadlocked.
# The FTWRL command would wait forever for T2 to commit.
# T2 would wait for T1 to commit first, but T1 is waiting for
# the global read lock to be released.

--connection s1
# Release the lock that blocs T1 from replicating.
COMMIT;

--connection s1
send STOP SLAVE;

--connection s2
reap;

--connection server_1
SELECT * FROM t2 ORDER BY a;

--connection s2
UNLOCK TABLES;

SELECT "after UNLOCK TABLES" as state;

--connection s1
reap;

SELECT "after reap of STOP SLAVE" as state;

--connection server_2
--source include/wait_for_slave_to_stop.inc
--source include/start_slave.inc
--sync_with_master

SELECT * FROM t2 ORDER BY a;


--echo *** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***

--connection server_1
LOCK TABLE t2 WRITE;


--connect (m1,localhost,root,,test)
--connection m1
--let $cid=`SELECT CONNECTION_ID()`
send FLUSH TABLES WITH READ LOCK;

--connect (m2,localhost,root,,test)
# We cannot force the race with DEBUG_SYNC, because the race does not
# exist after fixing the bug. At best we could force a debug sync to
# time out, which is effectively just a sleep.
# So just put a small sleep here; it is enough to trigger the bug in
# most run before the bug fix, and the code should work correctly
# however the thread scheduling happens.
--sleep 0.1
send FLUSH TABLES WITH READ LOCK;

--connection server_1
--replace_result $cid CID
eval KILL QUERY $cid;

--connection m1
--error ER_QUERY_INTERRUPTED
reap;

--connection server_1
UNLOCK TABLES;

--connection m2
reap;
UNLOCK TABLES;


# Clean up.
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
set global slave_parallel_mode= @old_parallel_mode;
--source include/start_slave.inc

--connection server_1
DROP TABLE t1, t2;

--source include/rpl_end.inc