From 50b3632fa434e6bf836830365f664b67ff989279 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Mon, 25 Feb 2019 21:49:04 +0100 Subject: MDEV-9519: Data corruption will happen on the Galera cluster size change If we have a 2+ node cluster which is replicating from an async master and the binlog_format is set to STATEMENT and multi-row inserts are executed on a table with an auto_increment column such that values are automatically generated by MySQL, then the server node generates wrong auto_increment values, which are different from what was generated on the async master. In the title of the MDEV-9519 it was proposed to ban start slave on a Galera if master binlog_format = statement and wsrep_auto_increment_control = 1, but the problem can be solved without such a restriction. The causes and fixes: 1. We need to improve processing of changing the auto-increment values after changing the cluster size. 2. If wsrep auto_increment_control switched on during operation of the node, then we should immediately update the auto_increment_increment and auto_increment_offset global variables, without waiting of the next invocation of the wsrep_view_handler_cb() callback. In the current version these variables retain its initial values if wsrep_auto_increment_control is switched on during operation of the node, which leads to inconsistent results on the different nodes in some scenarios. 3. If wsrep auto_increment_control switched off during operation of the node, then we must return the original values of the auto_increment_increment and auto_increment_offset global variables, as the user has set. To make this possible, we need to add a "shadow copies" of these variables (which stores the latest values set by the user). https://jira.mariadb.org/browse/MDEV-9519 --- sql/ha_partition.cc | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'sql/ha_partition.cc') diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 1349571a3f8..051a0e15af9 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -10511,31 +10511,37 @@ void ha_partition::release_auto_increment() m_file[i]->ha_release_auto_increment(); } } - else if (next_insert_id) + else { - ulonglong next_auto_inc_val; lock_auto_increment(); - next_auto_inc_val= part_share->next_auto_inc_val; - /* - If the current auto_increment values is lower than the reserved - value, and the reserved value was reserved by this thread, - we can lower the reserved value. - */ - if (next_insert_id < next_auto_inc_val && - auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val) + if (next_insert_id) { - THD *thd= ha_thd(); + ulonglong next_auto_inc_val= part_share->next_auto_inc_val; /* - Check that we do not lower the value because of a failed insert - with SET INSERT_ID, i.e. forced/non generated values. + If the current auto_increment values is lower than the reserved + value, and the reserved value was reserved by this thread, + we can lower the reserved value. */ - if (thd->auto_inc_intervals_forced.maximum() < next_insert_id) - part_share->next_auto_inc_val= next_insert_id; + if (next_insert_id < next_auto_inc_val && + auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val) + { + THD *thd= ha_thd(); + /* + Check that we do not lower the value because of a failed insert + with SET INSERT_ID, i.e. forced/non generated values. + */ + if (thd->auto_inc_intervals_forced.maximum() < next_insert_id) + part_share->next_auto_inc_val= next_insert_id; + } + DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu", + (ulong) part_share->next_auto_inc_val)); } - DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu", - (ulong) part_share->next_auto_inc_val)); - - /* Unlock the multi row statement lock taken in get_auto_increment */ + /* + Unlock the multi-row statement lock taken in get_auto_increment. + These actions must be performed even if the next_insert_id field + contains zero, otherwise if the update_auto_increment fails then + an unnecessary lock will remain: + */ if (auto_increment_safe_stmt_log_lock) { auto_increment_safe_stmt_log_lock= FALSE; -- cgit v1.2.1