summaryrefslogtreecommitdiff
path: root/sql/handler.cc
diff options
context:
space:
mode:
authorSujatha <sujatha.sivakumar@mariadb.com>2020-04-09 20:45:45 +0530
committerAndrei Elkin <andrei.elkin@mariadb.com>2020-10-23 18:04:31 +0300
commitbc00529d14aeda845d8129cb924efe4db2c11228 (patch)
tree3cfd84a4f79d8fa32887713b947493e48b0bbbc0 /sql/handler.cc
parent72cb20820b4f48fda0ae5e0ead4783d76dde3ec0 (diff)
downloadmariadb-git-bb-10.1-andrei.tar.gz
MDEV-21117: --tc-heuristic-recover=rollback is not replication safebb-10.1-andrei
Problem: ======= When run after master server crash --tc-heuristic-recover=rollback produces inconsistent server state with binlog still containing transactions that were rolled back by the option. Such way recovered server may not be used for replication. Fix: === A new --tc-heuristic-recover=BINLOG_TRUNCATE value is added to the server option. It works as ROLLBACK for engines but makes its best to synchronize binlog to truncate it to the last committed transaction. The BINLOG_TRUNCATE algorithm starts scanning possibly a sequences of binlog files starting from the binlog checkpoint file to find a truncate position candidate. The candidate truncate position corresponds to the beginning of the first GTID event whose transaction is not found to have been committed. There must be no committed transactions beyond this position *but* non-transactional events are possible. This is also checked to warn when the case. Rollback and possibly completion of partial commits (in the multi-engine case) is done regardless, as well as the binlog GTID state is adjusted accordingly. Also a STOP binlog event is written at the end of truncated binary log.
Diffstat (limited to 'sql/handler.cc')
-rw-r--r--sql/handler.cc143
1 files changed, 138 insertions, 5 deletions
diff --git a/sql/handler.cc b/sql/handler.cc
index 76a187f4312..42cd048afc8 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1249,6 +1249,29 @@ int ha_prepare(THD *thd)
DBUG_RETURN(error);
}
+/*
+ Returns counted number of
+ read-write recoverable transaction participants optionally limited to two.
+ Also optionally returns the last found rw ha_info through the 2nd argument.
+*/
+uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info, bool count_through)
+{
+ unsigned rw_ha_count= 0;
+
+ for (Ha_trx_info * ha_info= thd->transaction.all.ha_list; ha_info;
+ ha_info= ha_info->next())
+ {
+ if (ha_info->is_trx_read_write() && ha_info->ht()->recover)
+ {
+ if (ptr_ha_info)
+ *ptr_ha_info= ha_info;
+ if (++rw_ha_count > 1 && !count_through)
+ break;
+ }
+ }
+ return rw_ha_count;
+}
+
/**
Check if we can skip the two-phase commit.
@@ -1866,7 +1889,7 @@ static char* xid_to_str(char *buf, XID *xid)
recover() step of xa.
@note
- there are three modes of operation:
+ there are four modes of operation:
- automatic recover after a crash
in this case commit_list != 0, tc_heuristic_recover==0
all xids from commit_list are committed, others are rolled back
@@ -1877,6 +1900,9 @@ static char* xid_to_str(char *buf, XID *xid)
- no recovery (MySQL did not detect a crash)
in this case commit_list==0, tc_heuristic_recover == 0
there should be no prepared transactions in this case.
+ - recovery to truncated binlog to the last committed transaction
+ in any engine. Other prepared following binlog order transactions are
+ rolled back.
*/
struct xarecover_st
{
@@ -1884,8 +1910,95 @@ struct xarecover_st
XID *list;
HASH *commit_list;
bool dry_run;
+ MEM_ROOT *mem_root;
+ bool error;
};
+#ifdef HAVE_REPLICATION
+/*
+ Inserts a new hash member.
+
+ returns a successfully created and inserted @c xid_recovery_member
+ into hash @c hash_arg,
+ or NULL.
+*/
+static xid_recovery_member*
+xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root)
+{
+ xid_recovery_member *member= (xid_recovery_member*)
+ alloc_root(ptr_mem_root, sizeof(xid_recovery_member));
+ if (!member)
+ return NULL;
+
+ member->xid= xid_arg;
+ member->in_engine_prepare= 1;
+ return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member;
+}
+
+/*
+ Inserts a new or updates an existing hash member.
+
+ returns false on success,
+ true otherwise.
+*/
+static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg,
+ MEM_ROOT *ptr_mem_root)
+{
+ /*
+ Search if XID is already present in recovery_list. If found
+ and the state is 'XA_PREPRAED' mark it as XA_COMPLETE.
+ Effectively, there won't be XA-prepare event group replay.
+ */
+ xid_recovery_member* member;
+ if ((member= (xid_recovery_member *)
+ my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg))))
+ member->in_engine_prepare++;
+ else
+ member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root);
+
+ return member == NULL;
+}
+
+/*
+ Hash iterate function to complete with commit or rollback as
+ decided at binlog scanning.
+*/
+static my_bool xarecover_do_commit_or_rollback(void *member_arg,
+ void *hton_arg)
+{
+ xid_recovery_member *member= (xid_recovery_member*) member_arg;
+ handlerton *hton= (handlerton*) hton_arg;
+ xid_t x;
+ my_bool rc;
+
+ x.set(member->xid);
+ rc= member->in_engine_prepare > 0 ?
+ hton->rollback_by_xid(hton, &x) : hton->commit_by_xid(hton, &x);
+
+ return rc;
+}
+
+static my_bool xarecover_binlog_truncate_handlerton(THD *unused,
+ plugin_ref plugin,
+ void *arg)
+{
+ handlerton *hton= plugin_hton(plugin);
+
+ if (hton->state == SHOW_OPTION_YES && hton->recover)
+ {
+ my_hash_iterate((HASH*) arg, xarecover_do_commit_or_rollback, hton);
+ }
+
+ return FALSE;
+}
+
+void ha_recover_binlog_truncate_complete(HASH *commit_list)
+{
+ plugin_foreach(NULL, xarecover_binlog_truncate_handlerton,
+ MYSQL_STORAGE_ENGINE_PLUGIN, commit_list);
+}
+#endif
+
static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
void *arg)
{
@@ -1893,13 +2006,16 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
struct xarecover_st *info= (struct xarecover_st *) arg;
int got;
+ if (info->error)
+ return TRUE;
+
if (hton->state == SHOW_OPTION_YES && hton->recover)
{
while ((got= hton->recover(hton, info->list, info->len)) > 0 )
{
sql_print_information("Found %d prepared transaction(s) in %s",
got, hton_name(hton)->str);
- for (int i=0; i < got; i ++)
+ for (int i=0; i < got && !info->error; i ++)
{
my_xid x= WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ?
wsrep_xid_seqno(info->list[i]) :
@@ -1936,7 +2052,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
}
#endif
}
- else
+ else if (tc_heuristic_recover != TC_RECOVER_BINLOG_TRUNCATE)
{
#ifndef DBUG_OFF
int rc=
@@ -1951,6 +2067,17 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
}
#endif
}
+#ifdef HAVE_REPLICATION
+ else
+ {
+ if (xid_member_replace(info->commit_list, x, info->mem_root))
+ {
+ info->error= true;
+ sql_print_error("Error in memory allocation at xarecover_handlerton");
+ break;
+ }
+ }
+#endif
}
if (got < info->len)
break;
@@ -1959,7 +2086,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
return FALSE;
}
-int ha_recover(HASH *commit_list)
+int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root)
{
struct xarecover_st info;
DBUG_ENTER("ha_recover");
@@ -1967,9 +2094,12 @@ int ha_recover(HASH *commit_list)
info.commit_list= commit_list;
info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
info.list= NULL;
+ info.mem_root= arg_mem_root;
+ info.error= false;
/* commit_list and tc_heuristic_recover cannot be set both */
- DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
+ DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0 ||
+ tc_heuristic_recover > 2);
/* if either is set, total_ha_2pc must be set too */
DBUG_ASSERT(info.dry_run ||
(failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
@@ -2011,6 +2141,9 @@ int ha_recover(HASH *commit_list)
info.found_my_xids, opt_tc_log_file);
DBUG_RETURN(1);
}
+ if (info.error)
+ DBUG_RETURN(1);
+
if (info.commit_list)
sql_print_information("Crash recovery finished.");
DBUG_RETURN(0);