summaryrefslogtreecommitdiff
path: root/sql/rpl_gtid.cc
diff options
context:
space:
mode:
authorAndrei Elkin <andrei.elkin@mariadb.com>2017-09-29 21:56:59 +0300
committerAndrei Elkin <andrei.elkin@mariadb.com>2017-11-15 22:26:32 +0200
commitaae4932775d5780fe575087b40779700eaa4fcbf (patch)
treeedc0d2f8e178d19daf9c89688f24c9cdd9a3ef06 /sql/rpl_gtid.cc
parent7e1326cfcf5aea38f40fb787f1b987cb325c785b (diff)
downloadmariadb-git-aae4932775d5780fe575087b40779700eaa4fcbf.tar.gz
MDEV-12012/MDEV-11969 Can't remove GTIDs for a stale GTID Domain ID
As reported in MDEV-11969 "there's no way to ditch knowledge" about some domain that is no longer updated on a server. Besides being of annoyance to clutter output in DBA console stale domains can prevent the slave to connect the master as MDEV-12012 witnesses. What domain is obsolete must be evaluated by the user (DBA) according to whether the domain info is still relevant and will the domain ever receive any update. This patch introduces a method to discard obsolete gtid domains from the server binlog state. The removal requires no event group from such domain present in existing binlog files though. If there are any the containing logs must be first PURGEd in order for FLUSH BINARY LOGS DELETE_DOMAIN_ID=(list-of-domains) succeed. Otherwise the command returns an error. The list of obsolete domains can be computed through intersecting two sets - the earliest (first) binlog's Gtid_list and the current value of @@global.gtid_binlog_state - and extracting the domain id components from the intersection list items. The new DELETE_DOMAIN_ID featured FLUSH continues to rotate binlog omitting the deleted domains from the active binlog file's Gtid_list. Notice though when the command is ineffective - that none of requested to delete domain exists in the binlog state - rotation does not occur. Obsolete domain deletion is not harmful for connected slaves as long as master side binlog files *purge* is synchronized with FLUSH-DELETE_DOMAIN_ID. The slaves must have the last event from purged files processed as usual, in order not to bump later into requesting a gtid from a file which was already gone. While the command is not replicated (as ordinary FLUSH BINLOG LOGS is) slaves, even though having extra domains, won't suffer from reconnection errors thanks to master-slave gtid connection protocol allowing the master to be ignorant about a gtid domain. Should at failover such slave to be promoted into master role it may run the ex-master's FLUSH BINARY LOGS DELETE_DOMAIN_ID=(list-of-domains) to clean its own binlog state. NOTES. suite/perfschema/r/start_server_low_digest.result is re-recorded as consequence of internal parser codes changes.
Diffstat (limited to 'sql/rpl_gtid.cc')
-rw-r--r--sql/rpl_gtid.cc151
1 files changed, 150 insertions, 1 deletions
diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
index 51df8f1a789..7b1acf17ef5 100644
--- a/sql/rpl_gtid.cc
+++ b/sql/rpl_gtid.cc
@@ -26,7 +26,7 @@
#include "key.h"
#include "rpl_gtid.h"
#include "rpl_rli.h"
-
+#include "log_event.h"
const LEX_STRING rpl_gtid_slave_state_table_name=
{ C_STRING_WITH_LEN("gtid_slave_pos") };
@@ -1728,6 +1728,155 @@ end:
return res;
}
+/**
+ Remove domains supplied by the first argument from binlog state.
+ Removal is done for any domain whose last gtids (from all its servers) match
+ ones in Gtid list event of the 2nd argument.
+
+ @param ids gtid domain id sequence, may contain dups
+ @param glev pointer to Gtid list event describing
+ the match condition
+ @param errbuf [out] pointer to possible error message array
+
+ @retval NULL as success when at least one domain is removed
+ @retval "" empty string to indicate ineffective call
+ when no domains removed
+ @retval NOT EMPTY string otherwise an error message
+*/
+const char*
+rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids,
+ Gtid_list_log_event *glev,
+ char* errbuf)
+{
+ DYNAMIC_ARRAY domain_unique; // sequece (unsorted) of unique element*:s
+ rpl_binlog_state::element* domain_unique_buffer[16];
+ ulong k, l;
+ const char* errmsg= NULL;
+
+ DBUG_ENTER("rpl_binlog_state::drop_domain");
+
+ my_init_dynamic_array2(&domain_unique,
+ sizeof(element*), domain_unique_buffer,
+ sizeof(domain_unique_buffer) / sizeof(element*), 4, 0);
+
+ mysql_mutex_lock(&LOCK_binlog_state);
+
+ /*
+ Gtid list is supposed to come from a binlog's Gtid_list event and
+ therefore should be a subset of the current binlog state. That is
+ for every domain in the list the binlog state contains a gtid with
+ sequence number not less than that of the list.
+ Exceptions of this inclusion rule are:
+ A. the list may still refer to gtids from already deleted domains.
+ Files containing them must have been purged whereas the file
+ with the list is not yet.
+ B. out of order groups were injected
+ C. manually build list of binlog files violating the inclusion
+ constraint.
+ While A is a normal case (not necessarily distinguishable from C though),
+ B and C may require the user's attention so any (incl the A's suspected)
+ inconsistency is diagnosed and *warned*.
+ */
+ for (l= 0, errbuf[0]= 0; l < glev->count; l++, errbuf[0]= 0)
+ {
+ rpl_gtid* rb_state_gtid= find_nolock(glev->list[l].domain_id,
+ glev->list[l].server_id);
+ if (!rb_state_gtid)
+ sprintf(errbuf,
+ "missing gtids from the '%u-%u' domain-server pair which is "
+ "referred to in the gtid list describing an earlier state. Ignore "
+ "if the domain ('%u') was already explicitly deleted",
+ glev->list[l].domain_id, glev->list[l].server_id,
+ glev->list[l].domain_id);
+ else if (rb_state_gtid->seq_no < glev->list[l].seq_no)
+ sprintf(errbuf,
+ "having a gtid '%u-%u-%llu' which is less than "
+ "the '%u-%u-%llu' of the gtid list describing an earlier state. "
+ "The state may have been affected by manually injecting "
+ "a lower sequence number gtid or via replication",
+ rb_state_gtid->domain_id, rb_state_gtid->server_id,
+ rb_state_gtid->seq_no, glev->list[l].domain_id,
+ glev->list[l].server_id, glev->list[l].seq_no);
+ if (strlen(errbuf)) // use strlen() as cheap flag
+ push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_BINLOG_CANT_DELETE_GTID_DOMAIN,
+ "The current gtid binlog state is incompatible with "
+ "a former one %s.", errbuf);
+ }
+
+ /*
+ For each domain_id from ids
+ when no such domain in binlog state
+ warn && continue
+ For each domain.server's last gtid
+ when not locate the last gtid in glev.list
+ error out binlog state can't change
+ otherwise continue
+ */
+ for (ulong i= 0; i < ids->elements; i++)
+ {
+ rpl_binlog_state::element *elem= NULL;
+ ulong *ptr_domain_id;
+ bool not_match;
+
+ ptr_domain_id= (ulong*) dynamic_array_ptr(ids, i);
+ elem= (rpl_binlog_state::element *)
+ my_hash_search(&hash, (const uchar *) ptr_domain_id, 0);
+ if (!elem)
+ {
+ push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_BINLOG_CANT_DELETE_GTID_DOMAIN,
+ "The gtid domain being deleted ('%lu') is not in "
+ "the current binlog state", *ptr_domain_id);
+ continue;
+ }
+
+ for (not_match= true, k= 0; k < elem->hash.records; k++)
+ {
+ rpl_gtid *d_gtid= (rpl_gtid *)my_hash_element(&elem->hash, k);
+ for (ulong l= 0; l < glev->count && not_match; l++)
+ not_match= !(*d_gtid == glev->list[l]);
+ }
+
+ if (not_match)
+ {
+ sprintf(errbuf, "binlog files may contain gtids from the domain ('%lu') "
+ "being deleted. Make sure to first purge those files",
+ *ptr_domain_id);
+ errmsg= errbuf;
+ goto end;
+ }
+ // compose a sequence of unique pointers to domain object
+ for (k= 0; k < domain_unique.elements; k++)
+ {
+ if ((rpl_binlog_state::element*) dynamic_array_ptr(&domain_unique, k)
+ == elem)
+ break; // domain_id's elem has been already in
+ }
+ if (k == domain_unique.elements) // proven not to have duplicates
+ insert_dynamic(&domain_unique, (uchar*) &elem);
+ }
+
+ // Domain removal from binlog state
+ for (k= 0; k < domain_unique.elements; k++)
+ {
+ rpl_binlog_state::element *elem= *(rpl_binlog_state::element**)
+ dynamic_array_ptr(&domain_unique, k);
+ my_hash_free(&elem->hash);
+ my_hash_delete(&hash, (uchar*) elem);
+ }
+
+ DBUG_ASSERT(strlen(errbuf) == 0);
+
+ if (domain_unique.elements == 0)
+ errmsg= "";
+
+end:
+ mysql_mutex_unlock(&LOCK_binlog_state);
+ delete_dynamic(&domain_unique);
+
+ DBUG_RETURN(errmsg);
+}
slave_connection_state::slave_connection_state()
{