From 0c1f97b3ab7befdae522aaa738be00f6700cee8e Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Tue, 5 May 2020 20:32:32 +0300 Subject: MDEV-15152 Optimistic parallel slave doesnt cope well with START SLAVE UNTIL The immediate bug was caused by a failure to recognize a correct position to stop the slave applier run in optimistic parallel mode. There were the following set of issues that the analysis unveil. 1 incorrect estimate for the event binlog position passed to is_until_satisfied 2 wait for workers to complete by the driver thread did not account non-group events that could be left unprocessed and thus to mix up the last executed binlog group's file and position: the file remained old and the position related to the new rotated file 3 incorrect 'slave reached file:pos' by the parallel slave report in the error log 4 relay log UNTIL missed out the parallel slave branch in is_until_satisfied. The patch addresses all of them to simplify logics of log change notification in either the master and relay-log until case. P.1 is addressed with passing the event into is_until_satisfied() for proper analisis by the function. P.2 is fixed by changes in handle_queued_pos_update(). P.4 required removing relay-log change notification by workers. Instead the driver thread updates the notion of the current relay-log fully itself with aid of introduced bool Relay_log_info::until_relay_log_names_defer. An extra print out of the requested until file:pos is arranged with --log-warning=3. --- sql/rpl_parallel.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'sql/rpl_parallel.cc') diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 027aa5f628c..3871ddcf8ef 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -75,18 +75,18 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) /* Do not update position if an earlier event group caused an error abort. */ DBUG_ASSERT(qev->typ == rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE); + rli= qev->rgi->rli; e= qev->entry_for_queued; - if (e->stop_on_error_sub_id < (uint64)ULONGLONG_MAX || e->force_abort) + if (e->stop_on_error_sub_id < (uint64)ULONGLONG_MAX || + (e->force_abort && !rli->stop_for_until)) return; - rli= qev->rgi->rli; mysql_mutex_lock(&rli->data_lock); cmp= strcmp(rli->group_relay_log_name, qev->event_relay_log_name); if (cmp < 0) { rli->group_relay_log_pos= qev->future_event_relay_log_pos; strmake_buf(rli->group_relay_log_name, qev->event_relay_log_name); - rli->notify_group_relay_log_name_update(); } else if (cmp == 0 && rli->group_relay_log_pos < qev->future_event_relay_log_pos) rli->group_relay_log_pos= qev->future_event_relay_log_pos; -- cgit v1.2.1