From 3dc3ab1a3048484910ca8acccaf76c71b080e533 Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Thu, 11 Jan 2018 23:56:54 +0200 Subject: Added checking that row events ends with a proper end block Problems -------- The slave io thread did not conduct integrity check for a group of row-based events. Specifically it tolerates missed terminal block event that must be flagged with STMT_END. Failure to react on its loss can confuse the applier thread in various ways. Another potential issue was that there were no check of impossible second in row Gtid-log-event while the slave io thread is receiving to be skipped events after reconnect. Fixes ----- The slave io thread is made by this patch to track the rows event STMT_END status. Whenever at next event reading the IO thread finds out that a preceding Rows event did not actually had the flag, an explicit error is issued. Replication can be resumed after the source of failure is eliminated, see a provided test. Note that currently the row-based group integrity check excludes the compressed version 2 Rows events (which are not generated by MariaDB master). Its uncompressed counterpart is manually tested. The 2nd issue is covered to produce an error in case the io thread receives a successive Gtid_log_event while it is post-reconnect skipping. --- sql/rpl_mi.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'sql/rpl_mi.h') diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index ccc1be6e5ce..b304e45f86a 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -133,6 +133,19 @@ public: extern TYPELIB slave_parallel_mode_typelib; +typedef struct st_rows_event_tracker +{ + char binlog_file_name[FN_REFLEN]; + my_off_t first_seen; + my_off_t last_seen; + bool stmt_end_seen; + void update(const char* file_name, size_t pos, + const char* buf, + const Format_description_log_event *fdle); + void reset(); + bool check_and_report(const char* file_name, size_t pos); +} Rows_event_tracker; + /***************************************************************************** Replication IO Thread @@ -301,6 +314,14 @@ class Master_info : public Slave_reporting_capability uint64 gtid_reconnect_event_skip_count; /* gtid_event_seen is false until we receive first GTID event from master. */ bool gtid_event_seen; + /** + The struct holds some history of Rows- log-event reading/queuing + by the receiver thread. Its fields are updated per each such event + at time of queue_event(), and they are checked to detect + the Rows- event group integrity violation at time of first non-Rows- + event gets handled. + */ + Rows_event_tracker rows_event_tracker; bool in_start_all_slaves, in_stop_all_slaves; bool in_flush_all_relay_logs; uint users; /* Active user for object */ -- cgit v1.2.1