From abe6eb10a65d5c28c221d756357b4e7f392ad13d Mon Sep 17 00:00:00 2001 From: Sujatha Date: Fri, 30 Apr 2021 18:12:43 +0530 Subject: MDEV-16146: MariaDB slave stops with following errors. Problem: ======== 180511 11:07:58 [ERROR] Slave I/O: Unexpected master's heartbeat data: heartbeat is not compatible with local info;the event's data: log_file_name mysql-bin.000009 log_pos 1054262041, Error_code: 1623 Analysis: ========= In replication setup when master server doesn't have any events to send to slave server it sends an 'Heartbeat_log_event'. This event carries the current binary log filename and offset details. The offset values is stored within 4 bytes of event header. When the size of binary log is higher than UINT32_MAX the log_pos values will not fit in 4 bytes memory. It overflows and hence slave stops with an error. Fix: === Since we cannot extend the common_header of Log_event class, a greater than 4GB value of Log_event::log_pos is made to be transported with a HeartBeat event's sub-header. Log_event::log_pos in such case is set to zero to indicate that the 8 byte sub-header is allocated in the event. In case of cross version replication following behaviour is expected OLD - Server without fix NEW - Server with fix OLD<->NEW : works bidirectionally as long as the binlog offset is (normally) within 4GB. When log_pos > UINT32_MAX OLD->NEW : The 'log_pos' is bound to overflow and NEW slave may report an invalid event/incompatible heart beat event error. NEW->OLD : Since patched server sets log_pos=0 on overflow, OLD slave will report invalid event error. --- sql/sql_repl.cc | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'sql/sql_repl.cc') diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 59a3f686e45..7ff0e27b008 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -32,6 +32,7 @@ #include "debug_sync.h" #include "log.h" // get_gtid_list_event + enum enum_gtid_until_state { GTID_UNTIL_NOT_DONE, GTID_UNTIL_STOP_AFTER_STANDALONE, @@ -781,7 +782,7 @@ get_slave_until_gtid(THD *thd, String *out_str) @param event_coordinates binlog file name and position of the last real event master sent from binlog - @note + @note Among three essential pieces of heartbeat data Log_event::when is computed locally. The error to send is serious and should force terminating @@ -795,6 +796,8 @@ static int send_heartbeat_event(binlog_send_info *info, DBUG_ENTER("send_heartbeat_event"); ulong ev_offset; + char sub_header_buf[HB_SUB_HEADER_LEN]; + bool sub_header_in_use=false; if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg)) DBUG_RETURN(1); @@ -815,18 +818,38 @@ static int send_heartbeat_event(binlog_send_info *info, ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + (do_checksum ? BINLOG_CHECKSUM_LEN : 0); int4store(header + SERVER_ID_OFFSET, global_system_variables.server_id); + DBUG_EXECUTE_IF("simulate_pos_4G", + { + const_cast(coord)->pos= (UINT_MAX32 + (ulong)1); + DBUG_SET("-d, simulate_pos_4G"); + };); + if (coord->pos <= UINT_MAX32) + { + int4store(header + LOG_POS_OFFSET, coord->pos); // log_pos + } + else + { + // Set common_header.log_pos=0 to indicate its overflow + int4store(header + LOG_POS_OFFSET, 0); + sub_header_in_use= true; + int8store(sub_header_buf, coord->pos); + event_len+= HB_SUB_HEADER_LEN; + } + int4store(header + EVENT_LEN_OFFSET, event_len); int2store(header + FLAGS_OFFSET, 0); - int4store(header + LOG_POS_OFFSET, coord->pos); // log_pos - packet->append(header, sizeof(header)); - packet->append(p, ident_len); // log_file_name + if (sub_header_in_use) + packet->append(sub_header_buf, sizeof(sub_header_buf)); + packet->append(p, ident_len); // log_file_name if (do_checksum) { char b[BINLOG_CHECKSUM_LEN]; ha_checksum crc= my_checksum(0, (uchar*) header, sizeof(header)); + if (sub_header_in_use) + crc= my_checksum(crc, (uchar*) sub_header_buf, sizeof(sub_header_buf)); crc= my_checksum(crc, (uchar*) p, ident_len); int4store(b, crc); packet->append(b, sizeof(b)); -- cgit v1.2.1