diff options
Diffstat (limited to 'sql/slave.cc')
-rw-r--r-- | sql/slave.cc | 95 |
1 files changed, 90 insertions, 5 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index 93bb8669632..0f2d9f1d3d4 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2232,6 +2232,7 @@ slave_killed_err: static bool wait_for_relay_log_space(Relay_log_info* rli) { bool slave_killed=0; + bool ignore_log_space_limit; Master_info* mi = rli->mi; PSI_stage_info old_stage; THD* thd = mi->io_thd; @@ -2247,6 +2248,11 @@ static bool wait_for_relay_log_space(Relay_log_info* rli) !rli->ignore_log_space_limit) mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock); + ignore_log_space_limit= rli->ignore_log_space_limit; + rli->ignore_log_space_limit= 0; + + thd->EXIT_COND(&old_stage); + /* Makes the IO thread read only one event at a time until the SQL thread is able to purge the relay @@ -2270,7 +2276,8 @@ static bool wait_for_relay_log_space(Relay_log_info* rli) thread sleeps waiting for events. */ - if (rli->ignore_log_space_limit) + + if (ignore_log_space_limit) { #ifndef DBUG_OFF { @@ -2292,11 +2299,8 @@ static bool wait_for_relay_log_space(Relay_log_info* rli) mysql_mutex_unlock(&mi->data_lock); rli->sql_force_rotate_relay= false; } - - rli->ignore_log_space_limit= false; } - thd->EXIT_COND(&old_stage); DBUG_RETURN(slave_killed); } @@ -2860,7 +2864,8 @@ bool show_all_master_info(THD* thd) if (send_show_master_info_header(thd, 1, gtid_pos.length())) DBUG_RETURN(TRUE); - if (!(elements= master_info_index->master_info_hash.records)) + if (!master_info_index || + !(elements= master_info_index->master_info_hash.records)) goto end; /* @@ -5308,6 +5313,86 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) event_len - BINLOG_CHECKSUM_LEN : event_len, mi->rli.relay_log.description_event_for_queue); + if (unlikely(mi->gtid_reconnect_event_skip_count) && + unlikely(!mi->gtid_event_seen) && + rev.is_artificial_event() && + (mi->prev_master_id != mi->master_id || + strcmp(rev.new_log_ident, mi->master_log_name) != 0)) + { + /* + Artificial Rotate_log_event is the first event we receive at the start + of each master binlog file. It gives the name of the new binlog file. + + Normally, we already have this name from the real rotate event at the + end of the previous binlog file (unless we are making a new connection + using GTID). But if the master server restarted/crashed, there is no + rotate event at the end of the prior binlog file, so the name is new. + + We use this fact to handle a special case of master crashing. If the + master crashed while writing the binlog, it might end with a partial + event group lacking the COMMIT/XID event, which must be rolled + back. If the slave IO thread happens to get a disconnect in the middle + of exactly this event group, it will try to reconnect at the same GTID + and skip already fetched events. However, that GTID did not commit on + the master before the crash, so it does not really exist, and the + master will connect the slave at the next following GTID starting in + the next binlog. This could confuse the slave and make it mix the + start of one event group with the end of another. + + But we detect this case here, by noticing the change of binlog name + which detects the missing rotate event at the end of the previous + binlog file. In this case, we reset the counters to make us not skip + the next event group, and queue an artificial Format Description + event. The previously fetched incomplete event group will then be + rolled back when the Format Description event is executed by the SQL + thread. + + A similar case is if the reconnect somehow connects to a different + master server (like due to a network proxy or IP address takeover). + We detect this case by noticing a change of server_id and in this + case likewise rollback the partially received event group. + */ + Format_description_log_event fdle(4); + + if (mi->prev_master_id != mi->master_id) + sql_print_warning("The server_id of master server changed in the " + "middle of GTID %u-%u-%llu. Assuming a change of " + "master server, so rolling back the previously " + "received partial transaction. Expected: %lu, " + "received: %lu", mi->last_queued_gtid.domain_id, + mi->last_queued_gtid.server_id, + mi->last_queued_gtid.seq_no, + mi->prev_master_id, mi->master_id); + else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0) + sql_print_warning("Unexpected change of master binlog file name in the " + "middle of GTID %u-%u-%llu, assuming that master has " + "crashed and rolling back the transaction. Expected: " + "'%s', received: '%s'", + mi->last_queued_gtid.domain_id, + mi->last_queued_gtid.server_id, + mi->last_queued_gtid.seq_no, + mi->master_log_name, rev.new_log_ident); + + mysql_mutex_lock(log_lock); + if (likely(!fdle.write(rli->relay_log.get_log_file()) && + !rli->relay_log.flush_and_sync(NULL))) + { + rli->relay_log.harvest_bytes_written(&rli->log_space_total); + } + else + { + error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; + mysql_mutex_unlock(log_lock); + goto err; + } + rli->relay_log.signal_update(); + mysql_mutex_unlock(log_lock); + + mi->gtid_reconnect_event_skip_count= 0; + mi->events_queued_since_last_gtid= 0; + } + mi->prev_master_id= mi->master_id; + if (unlikely(process_io_rotate(mi, &rev))) { error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; |