diff options
author | Brave Galera Crew <devel@codership.com> | 2019-01-23 15:30:00 +0400 |
---|---|---|
committer | Sergey Vojtovich <svoj@mariadb.org> | 2019-01-23 15:30:00 +0400 |
commit | 36a2a185fe18d31a644da46cfabd9757a379280c (patch) | |
tree | 00ca186ce2cfdc3ab7e4979336a384e2b51c5aa9 /sql | |
parent | 382115b99297ceaa4c3067f79efb5c2515013be5 (diff) | |
download | mariadb-git-36a2a185fe18d31a644da46cfabd9757a379280c.tar.gz |
Galera4
Diffstat (limited to 'sql')
77 files changed, 9026 insertions, 4744 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 697b794f39f..a22ce694805 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -16,21 +16,27 @@ IF(WITH_WSREP AND NOT EMBEDDED_LIBRARY) - SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep) SET(WSREP_SOURCES + wsrep_client_service.cc + wsrep_high_priority_service.cc + wsrep_server_service.cc + wsrep_storage_service.cc + wsrep_server_state.cc + wsrep_utils.cc + wsrep_xid.cc wsrep_check_opts.cc - wsrep_hton.cc - wsrep_mysqld.cc + wsrep_mysqld.cc wsrep_notify.cc wsrep_sst.cc - wsrep_utils.cc wsrep_var.cc wsrep_binlog.cc wsrep_applier.cc wsrep_thd.cc - wsrep_xid.cc + wsrep_schema.cc + wsrep_plugin.cc + service_wsrep.cc ) - SET(WSREP_LIB wsrep) + SET(WSREP_LIB wsrep-lib wsrep_api_v26) ELSE() SET(WSREP_SOURCES wsrep_dummy.cc) ENDIF() @@ -42,7 +48,6 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql -${WSREP_INCLUDES} ) diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index db056a9f08e..6327cd138de 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -32,7 +32,9 @@ #include "event_db_repository.h" #include "sp_head.h" #include "sql_show.h" // append_definer, append_identifier - +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ /** @addtogroup Event_Scheduler @{ @@ -1353,6 +1355,10 @@ Event_job_data::execute(THD *thd, bool drop) thd->reset_for_next_command(); +#ifdef WITH_WSREP + wsrep_open(thd); + wsrep_before_command(thd); +#endif /* WITH_WSREP */ /* MySQL parser currently assumes that current database is either present in THD or all names in all statements are fully specified. @@ -1527,6 +1533,10 @@ end: if (save_sctx) event_sctx.restore_security_context(thd, save_sctx); #endif +#ifdef WITH_WSREP + wsrep_after_command_ignore_result(thd); + wsrep_close(thd); +#endif /* WITH_WSREP */ thd->lex->unit.cleanup(); thd->end_statement(); thd->cleanup_after_query(); diff --git a/sql/handler.cc b/sql/handler.cc index 001055cd475..1b5aaebe3cf 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -54,8 +54,12 @@ #include "semisync_master.h" #include "wsrep_mysqld.h" -#include "wsrep.h" +#ifdef WITH_WSREP +#include "wsrep_binlog.h" #include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" /* wsrep transaction hooks */ +#endif /* WITH_WSREP */ /* While we have legacy_db_type, we have this array to @@ -251,6 +255,9 @@ handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute) if (no_substitute) return NULL; +#ifdef WITH_WSREP + (void)wsrep_after_rollback(thd, false); +#endif /* WITH_WSREP */ return ha_default_handlerton(thd); } /* ha_checktype */ @@ -1199,17 +1206,28 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) static int prepare_or_error(handlerton *ht, THD *thd, bool all) { + #ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION && + wsrep_before_prepare(thd, all)) + { + return(1); + } +#endif /* WITH_WSREP */ + int err= ht->prepare(ht, thd, all); status_var_increment(thd->status_var.ha_prepare_count); if (err) { - /* avoid sending error, if we're going to replay the transaction */ -#ifdef WITH_WSREP - if (ht != wsrep_hton || - err == EMSGSIZE || thd->wsrep_conflict_state != MUST_REPLAY) -#endif my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); } +#ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION && + wsrep_after_prepare(thd, all)) + { + err= 1; + } +#endif /* WITH_WSREP */ + return err; } @@ -1394,7 +1412,7 @@ int ha_commit_trans(THD *thd, bool all) } #ifdef WITH_ARIA_STORAGE_ENGINE - ha_maria::implicit_commit(thd, TRUE); + ha_maria::implicit_commit(thd, TRUE); #endif if (!ha_info) @@ -1404,6 +1422,12 @@ int ha_commit_trans(THD *thd, bool all) */ if (is_real_trans) thd->transaction.cleanup(); +#ifdef WITH_WSREP + if (WSREP(thd) && all && !error) + { + wsrep_commit_empty(thd, all); + } +#endif /* WITH_WSREP */ DBUG_RETURN(0); } @@ -1489,7 +1513,28 @@ int ha_commit_trans(THD *thd, bool all) if (trans->no_2pc || (rw_ha_count <= 1)) { +#ifdef WITH_WSREP + /* + This commit will not go through log_and_order() where wsrep commit + ordering is normally done. Commit ordering must be done here. + */ + bool run_wsrep_commit= (WSREP(thd) && + rw_ha_count && + wsrep_thd_is_local(thd) && + wsrep_has_changes(thd, all)); + if (run_wsrep_commit) + error= wsrep_before_commit(thd, all); + if (error) + { + ha_rollback_trans(thd, FALSE); + goto wsrep_err; + } +#endif /* WITH_WSREP */ error= ha_commit_one_phase(thd, all); +#ifdef WITH_WSREP + if (run_wsrep_commit) + error= wsrep_after_commit(thd, all); +#endif /* WITH_WSREP */ goto done; } @@ -1521,10 +1566,14 @@ int ha_commit_trans(THD *thd, bool all) DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); #ifdef WITH_WSREP - if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + if (!error && WSREP_ON) { - // xid was rewritten by wsrep - xid= wsrep_xid_seqno(thd->transaction.xid_state.xid); + wsrep::seqno const s= wsrep_xid_seqno(thd->wsrep_xid); + if (!s.is_undefined()) + { + // xid was rewritten by wsrep + xid= s.get(); + } } #endif /* WITH_WSREP */ @@ -1533,18 +1582,35 @@ int ha_commit_trans(THD *thd, bool all) error= commit_one_phase_2(thd, all, trans, is_real_trans); goto done; } - +#ifdef WITH_WSREP + if (wsrep_before_commit(thd, all)) + goto wsrep_err; +#endif /* WITH_WSREP */ DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order"); cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered, need_commit_ordered); if (!cookie) + { + WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie); goto err; - + } DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order"); DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE();); error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0; - +#ifdef WITH_WSREP + if (error || wsrep_after_commit(thd, all)) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + if (thd->wsrep_trx().state() == wsrep::transaction::s_must_abort) + { + mysql_mutex_unlock(&thd->LOCK_thd_data); + (void)tc_log->unlog(cookie, xid); + goto wsrep_err; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } +#endif /* WITH_WSREP */ DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE();); if (tc_log->unlog(cookie, xid)) { @@ -1566,6 +1632,19 @@ done: goto end; /* Come here if error and we need to rollback. */ +#ifdef WITH_WSREP +wsrep_err: + mysql_mutex_lock(&thd->LOCK_thd_data); + if (thd->wsrep_trx().state() == wsrep::transaction::s_must_abort) + { + WSREP_DEBUG("BF abort has happened after prepare & certify"); + mysql_mutex_unlock(&thd->LOCK_thd_data); + ha_rollback_trans(thd, TRUE); + } + else + mysql_mutex_unlock(&thd->LOCK_thd_data); + +#endif /* WITH_WSREP */ err: error= 1; /* Transaction was rolled back */ /* @@ -1575,7 +1654,11 @@ err: */ if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec)) ha_rollback_trans(thd, all); - + else + { + WSREP_DEBUG("rollback skipped %p %d",thd->rgi_slave, + thd->rgi_slave->is_parallel_exec); + } end: if (rw_trans && mdl_request.ticket) { @@ -1587,6 +1670,13 @@ end: */ thd->mdl_context.release_lock(mdl_request.ticket); } +#ifdef WITH_WSREP + if (WSREP(thd) && all && !error && (rw_ha_count == 0)) + { + wsrep_commit_empty(thd, all); + } +#endif /* WITH_WSREP */ + DBUG_RETURN(error); } @@ -1744,6 +1834,9 @@ int ha_rollback_trans(THD *thd, bool all) DBUG_RETURN(1); } +#ifdef WITH_WSREP + (void) wsrep_before_rollback(thd, all); +#endif /* WITH_WSREP */ if (ha_info) { /* Close all cursors that can not survive ROLLBACK */ @@ -1759,9 +1852,9 @@ int ha_rollback_trans(THD *thd, bool all) my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; #ifdef WITH_WSREP - WSREP_WARN("handlerton rollback failed, thd %llu %lld conf %d SQL %s", - thd->thread_id, thd->query_id, thd->wsrep_conflict_state, - thd->query()); + WSREP_WARN("handlerton rollback failed, thd %lld %lld conf %d SQL %s", + thd->thread_id, thd->query_id, thd->wsrep_trx().state(), + thd->query()); #endif /* WITH_WSREP */ } status_var_increment(thd->status_var.ha_rollback_count); @@ -1780,6 +1873,15 @@ int ha_rollback_trans(THD *thd, bool all) thd->transaction.xid_state.xa_state != XA_NOTR) thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno(); +#ifdef WITH_WSREP + if (thd->is_error()) + { + WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: %s: %s; is_real %d", + thd->thread_id, all?"TRUE":"FALSE", WSREP_QUERY(thd), + thd->get_stmt_da()->message(), is_real_trans); + } + (void) wsrep_after_rollback(thd, all); +#endif /* WITH_WSREP */ /* Always cleanup. Even if nht==0. There may be savepoints. */ if (is_real_trans) { @@ -1913,6 +2015,28 @@ static char* xid_to_str(char *buf, XID *xid) } #endif +#ifdef WITH_WSREP +static my_xid wsrep_order_and_check_continuity(XID *list, int len) +{ + wsrep_sort_xid_array(list, len); + wsrep::gtid cur_position= wsrep_get_SE_checkpoint(); + long long cur_seqno= cur_position.seqno().get(); + for (int i= 0; i < len; ++i) + { + if (!wsrep_is_wsrep_xid(list + i) || + wsrep_xid_seqno(list + i) != cur_seqno + 1) + { + WSREP_WARN("Discovered discontinuity in recovered wsrep " + "transaction XIDs. Truncating the recovery list to " + "%d entries", i); + break; + } + ++cur_seqno; + } + WSREP_INFO("Last wsrep seqno to be recovered %lld", cur_seqno); + return (cur_seqno < 0 ? 0 : cur_seqno); +} +#endif /* WITH_WSREP */ /** recover() step of xa. @@ -1950,10 +2074,32 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, { sql_print_information("Found %d prepared transaction(s) in %s", got, hton_name(hton)->str); +#ifdef WITH_WSREP + /* If wsrep_on=ON, XIDs are first ordered and then the range of + recovered XIDs is checked for continuity. All the XIDs which + are in continuous range can be safely committed if binlog + is off since they have already ordered and certified in the + cluster. + + The discontinuity of wsrep XIDs may happen because the GTID + is assigned for transaction in wsrep_before_prepare(), but the + commit order is entered in wsrep_before_commit(). This means that + transactions may run prepare step out of order and may + result in gap in wsrep XIDs. This can be the case for example + if we have T1 with seqno 1 and T2 with seqno 2 and the server + crashes after T2 finishes prepare step but before T1 starts + the prepare. + */ + my_xid wsrep_limit= 0; + if (WSREP_ON) + { + wsrep_limit= wsrep_order_and_check_continuity(info->list, got); + } +#endif /* WITH_WSREP */ for (int i=0; i < got; i ++) { my_xid x= IF_WSREP(WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? - wsrep_xid_seqno(info->list[i]) : + wsrep_xid_seqno(&info->list[i]) : info->list[i].get_my_xid(), info->list[i].get_my_xid()); if (!x) // not "mine" - that is generated by external TM @@ -1972,9 +2118,12 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, continue; } // recovery mode - if (info->commit_list ? - my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 : - tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) + if (IF_WSREP((wsrep_emulate_bin_log && + wsrep_is_wsrep_xid(info->list + i) && + x <= wsrep_limit), false) || + (info->commit_list ? + my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 : + tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)) { #ifndef DBUG_OFF int rc= @@ -2332,11 +2481,26 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) { int err; handlerton *ht= ha_info->ht(); +#ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) + { + WSREP_DEBUG("ha_rollback_to_savepoint: run before_rollbackha_rollback_trans hook"); + (void) wsrep_before_rollback(thd, !thd->in_sub_stmt); + + } +#endif // WITH_WSREP if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt))) { // cannot happen my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } +#ifdef WITH_WSREP + if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) + { + WSREP_DEBUG("ha_rollback_to_savepoint: run after_rollback hook"); + (void) wsrep_after_rollback(thd, !thd->in_sub_stmt); + } +#endif // WITH_WSREP status_var_increment(thd->status_var.ha_rollback_count); ha_info_next= ha_info->next(); ha_info->reset(); /* keep it conveniently zero-filled */ @@ -2353,6 +2517,16 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) */ int ha_savepoint(THD *thd, SAVEPOINT *sv) { +#ifdef WITH_WSREP + /* + Register binlog hton for savepoint processing if wsrep binlog + emulation is on. + */ + if (WSREP_EMULATE_BINLOG(thd) && wsrep_thd_is_local(thd)) + { + wsrep_register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode()); + } +#endif /* WITH_WSREP */ int error=0; THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt : &thd->transaction.all); @@ -5968,6 +6142,12 @@ bool handler::check_table_binlog_row_based(bool binlog_row) return false; if (unlikely((table->in_use->variables.sql_log_bin_off))) return 0; /* Called by partitioning engine */ +#ifdef WITH_WSREP + if (!table->in_use->variables.sql_log_bin && + wsrep_thd_is_applying(table->in_use)) + return 0; /* wsrep patch sets sql_log_bin to silence binlogging + from high priority threads */ +#endif /* WITH_WSREP */ if (unlikely((!check_table_binlog_row_based_done))) { check_table_binlog_row_based_done= 1; @@ -5998,12 +6178,12 @@ bool handler::check_table_binlog_row_based_internal(bool binlog_row) Otherwise, return 'true' if binary logging is on. */ IF_WSREP(((WSREP_EMULATE_BINLOG(thd) && - (thd->wsrep_exec_mode != REPL_RECV)) || + wsrep_thd_is_local(thd)) || ((WSREP(thd) || (thd->variables.option_bits & OPTION_BIN_LOG)) && mysql_bin_log.is_open())), - (thd->variables.option_bits & OPTION_BIN_LOG) && - mysql_bin_log.is_open())); + (thd->variables.option_bits & OPTION_BIN_LOG) && + mysql_bin_log.is_open())); } @@ -6128,23 +6308,9 @@ int binlog_log_row(TABLE* table, const uchar *before_record, /* only InnoDB tables will be replicated through binlog emulation */ if ((WSREP_EMULATE_BINLOG(thd) && - table->file->partition_ht()->db_type != DB_TYPE_INNODB) || - (thd->wsrep_ignore_table == true)) + !(table->file->partition_ht()->flags & HTON_WSREP_REPLICATION)) || + thd->wsrep_ignore_table == true) return 0; - - /* enforce wsrep_max_ws_rows */ - if (WSREP(thd) && table->s->tmp_table == NO_TMP_TABLE) - { - thd->wsrep_affected_rows++; - if (wsrep_max_ws_rows && - thd->wsrep_exec_mode != REPL_RECV && - thd->wsrep_affected_rows > wsrep_max_ws_rows) - { - trans_rollback_stmt(thd) || trans_rollback(thd); - my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0)); - return ER_ERROR_DURING_COMMIT; - } - } #endif if (!table->file->check_table_binlog_row_based(1)) @@ -6256,6 +6422,27 @@ int handler::ha_reset() DBUG_RETURN(reset()); } +#ifdef WITH_WSREP +static int wsrep_after_row(THD *thd) +{ + DBUG_ENTER("wsrep_after_row"); + /* enforce wsrep_max_ws_rows */ + thd->wsrep_affected_rows++; + if (wsrep_max_ws_rows && + wsrep_thd_is_local(thd) && + thd->wsrep_affected_rows > wsrep_max_ws_rows) + { + trans_rollback_stmt(thd) || trans_rollback(thd); + my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0)); + DBUG_RETURN(ER_ERROR_DURING_COMMIT); + } + else if (wsrep_after_row(thd, false)) + { + DBUG_RETURN(ER_LOCK_DEADLOCK); + } + DBUG_RETURN(0); +} +#endif /* WITH_WSREP */ int handler::ha_write_row(uchar *buf) { @@ -6278,7 +6465,15 @@ int handler::ha_write_row(uchar *buf) { rows_changed++; error= binlog_log_row(table, 0, buf, log_func); +#ifdef WITH_WSREP + if (table_share->tmp_table == NO_TMP_TABLE && + WSREP(ha_thd()) && (error= wsrep_after_row(ha_thd()))) + { + DBUG_RETURN(error); + } +#endif /* WITH_WSREP */ } + DEBUG_SYNC_C("ha_write_row_end"); DBUG_RETURN(error); } @@ -6310,6 +6505,13 @@ int handler::ha_update_row(const uchar *old_data, const uchar *new_data) { rows_changed++; error= binlog_log_row(table, old_data, new_data, log_func); +#ifdef WITH_WSREP + if (table_share->tmp_table == NO_TMP_TABLE && + WSREP(ha_thd()) && (error= wsrep_after_row(ha_thd()))) + { + return error; + } +#endif /* WITH_WSREP */ } return error; } @@ -6365,6 +6567,13 @@ int handler::ha_delete_row(const uchar *buf) { rows_changed++; error= binlog_log_row(table, buf, 0, log_func); +#ifdef WITH_WSREP + if (table_share->tmp_table == NO_TMP_TABLE && + WSREP(ha_thd()) && (error= wsrep_after_row(ha_thd()))) + { + return error; + } +#endif /* WITH_WSREP */ } return error; } @@ -6554,7 +6763,7 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) DBUG_ENTER("ha_abort_transaction"); if (!WSREP(bf_thd) && !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && - bf_thd->wsrep_exec_mode == TOTAL_ORDER)) { + wsrep_thd_is_toi(bf_thd))) { DBUG_RETURN(0); } @@ -6570,54 +6779,6 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) DBUG_RETURN(0); } - -void ha_fake_trx_id(THD *thd) -{ - DBUG_ENTER("ha_fake_trx_id"); - - bool no_fake_trx_id= true; - - if (!WSREP(thd)) - { - DBUG_VOID_RETURN; - } - - if (thd->wsrep_ws_handle.trx_id != WSREP_UNDEFINED_TRX_ID) - { - WSREP_DEBUG("fake trx id skipped: %" PRIu64, thd->wsrep_ws_handle.trx_id); - DBUG_VOID_RETURN; - } - - /* Try statement transaction if standard one is not set. */ - THD_TRANS *trans= (thd->transaction.all.ha_list) ? &thd->transaction.all : - &thd->transaction.stmt; - - Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; - - for (; ha_info; ha_info= ha_info_next) - { - handlerton *hton= ha_info->ht(); - if (hton->fake_trx_id) - { - hton->fake_trx_id(hton, thd); - - /* Got a fake trx id. */ - no_fake_trx_id= false; - - /* - We need transaction ID from just one storage engine providing - fake_trx_id (which will most likely be the case). - */ - break; - } - ha_info_next= ha_info->next(); - } - - if (unlikely(no_fake_trx_id)) - WSREP_WARN("Cannot get fake transaction ID from storage engine."); - - DBUG_VOID_RETURN; -} #endif /* WITH_WSREP */ diff --git a/sql/handler.h b/sql/handler.h index fc6246c38a1..f5a7051a4e2 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1485,7 +1485,6 @@ struct handlerton THD *victim_thd, my_bool signal); int (*set_checkpoint)(handlerton *hton, const XID* xid); int (*get_checkpoint)(handlerton *hton, XID* xid); - void (*fake_trx_id)(handlerton *hton, THD *thd); /* Optional clauses in the CREATE/ALTER TABLE */ @@ -1682,6 +1681,9 @@ handlerton *ha_default_tmp_handlerton(THD *thd); // Engine needs to access the main connect string in partitions #define HTON_CAN_READ_CONNECT_STRING_IN_PARTITION (1 <<12) +/* can be replicated by wsrep replication provider plugin */ +#define HTON_WSREP_REPLICATION (1 << 13) + class Ha_trx_info; struct THD_TRANS @@ -4837,9 +4839,6 @@ int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); #ifdef WITH_WSREP int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); -void ha_fake_trx_id(THD *thd); -#else -inline void ha_fake_trx_id(THD *thd) { } #endif /* these are called by storage engines */ diff --git a/sql/item_create.cc b/sql/item_create.cc index 87bf69f3c96..ba7a704e29b 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -3193,6 +3193,45 @@ protected: }; #endif +#ifdef WITH_WSREP +class Create_func_wsrep_last_written_gtid : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_wsrep_last_written_gtid s_singleton; + +protected: + Create_func_wsrep_last_written_gtid() {} + virtual ~Create_func_wsrep_last_written_gtid() {} +}; + + +class Create_func_wsrep_last_seen_gtid : public Create_func_arg0 +{ +public: + virtual Item *create_builder(THD *thd); + + static Create_func_wsrep_last_seen_gtid s_singleton; + +protected: + Create_func_wsrep_last_seen_gtid() {} + virtual ~Create_func_wsrep_last_seen_gtid() {} +}; + + +class Create_func_wsrep_sync_wait_upto : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_CSTRING *name, List<Item> *item_list); + + static Create_func_wsrep_sync_wait_upto s_singleton; + +protected: + Create_func_wsrep_sync_wait_upto() {} + virtual ~Create_func_wsrep_sync_wait_upto() {} +}; +#endif /* WITH_WSREP */ #ifdef HAVE_SPATIAL class Create_func_x : public Create_func_arg1 @@ -6905,6 +6944,63 @@ Create_func_within::create_2_arg(THD *thd, Item *arg1, Item *arg2) } #endif +#ifdef WITH_WSREP +Create_func_wsrep_last_written_gtid +Create_func_wsrep_last_written_gtid::s_singleton; + +Item* +Create_func_wsrep_last_written_gtid::create_builder(THD *thd) +{ + thd->lex->safe_to_cache_query= 0; + return new (thd->mem_root) Item_func_wsrep_last_written_gtid(thd); +} + + +Create_func_wsrep_last_seen_gtid +Create_func_wsrep_last_seen_gtid::s_singleton; + +Item* +Create_func_wsrep_last_seen_gtid::create_builder(THD *thd) +{ + thd->lex->safe_to_cache_query= 0; + return new (thd->mem_root) Item_func_wsrep_last_seen_gtid(thd); +} + + +Create_func_wsrep_sync_wait_upto +Create_func_wsrep_sync_wait_upto::s_singleton; + +Item* +Create_func_wsrep_sync_wait_upto::create_native(THD *thd, + LEX_CSTRING *name, + List<Item> *item_list) +{ + Item *func= NULL; + int arg_count= 0; + Item *param_1, *param_2; + + if (item_list != NULL) + arg_count= item_list->elements; + + switch (arg_count) + { + case 1: + param_1= item_list->pop(); + func= new (thd->mem_root) Item_func_wsrep_sync_wait_upto(thd, param_1); + break; + case 2: + param_1= item_list->pop(); + param_2= item_list->pop(); + func= new (thd->mem_root) Item_func_wsrep_sync_wait_upto(thd, param_1, param_2); + break; + default: + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + break; + } + thd->lex->safe_to_cache_query= 0; + return func; +} +#endif /* WITH_WSREP */ #ifdef HAVE_SPATIAL Create_func_x Create_func_x::s_singleton; @@ -7347,6 +7443,11 @@ static Native_func_registry func_array[] = { { STRING_WITH_LEN("WEEKDAY") }, BUILDER(Create_func_weekday)}, { { STRING_WITH_LEN("WEEKOFYEAR") }, BUILDER(Create_func_weekofyear)}, { { STRING_WITH_LEN("WITHIN") }, GEOM_BUILDER(Create_func_within)}, +#ifdef WITH_WSREP + { { STRING_WITH_LEN("WSREP_LAST_WRITTEN_GTID") }, BUILDER(Create_func_wsrep_last_written_gtid)}, + { { STRING_WITH_LEN("WSREP_LAST_SEEN_GTID") }, BUILDER(Create_func_wsrep_last_seen_gtid)}, + { { STRING_WITH_LEN("WSREP_SYNC_WAIT_UPTO_GTID") }, BUILDER(Create_func_wsrep_sync_wait_upto)}, +#endif /* WITH_WSREP */ { { STRING_WITH_LEN("X") }, GEOM_BUILDER(Create_func_x)}, { { STRING_WITH_LEN("Y") }, GEOM_BUILDER(Create_func_y)}, { { STRING_WITH_LEN("YEARWEEK") }, BUILDER(Create_func_year_week)}, diff --git a/sql/item_func.cc b/sql/item_func.cc index c73cfc7953a..a10f381b1dc 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2451,7 +2451,7 @@ void Item_func_rand::seed_random(Item *arg) THD *thd= current_thd; if (WSREP(thd)) { - if (thd->wsrep_exec_mode==REPL_RECV) + if (wsrep_thd_is_applying(thd)) tmp= thd->wsrep_rand; else tmp= thd->wsrep_rand= (uint32) arg->val_int(); diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 8cc539f3d12..77e870b297d 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -5264,3 +5264,102 @@ String *Item_temptable_rowid::val_str(String *str) str_value.set((char*)(table->file->ref), max_length, &my_charset_bin); return &str_value; } +#ifdef WITH_WSREP + +#include "wsrep_mysqld.h" + +String *Item_func_wsrep_last_written_gtid::val_str_ascii(String *str) +{ + wsrep::gtid gtid= current_thd->wsrep_cs().last_written_gtid(); + if (gtid_str.alloc(wsrep::gtid_c_str_len())) + { + my_error(ER_OUTOFMEMORY, wsrep::gtid_c_str_len()); + null_value= true; + return NULL; + } + + ssize_t gtid_len= gtid_print_to_c_str(gtid, (char*) gtid_str.ptr(), + wsrep::gtid_c_str_len()); + if (gtid_len < 0) + { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), func_name(), + "wsrep_gtid_print failed"); + null_value= true; + return NULL; + } + gtid_str.length(gtid_len); + return >id_str; +} + +String *Item_func_wsrep_last_seen_gtid::val_str_ascii(String *str) +{ + /* TODO: Should call Wsrep_server_state.instance().last_committed_gtid() + instead. */ + wsrep::gtid gtid= Wsrep_server_state::instance().provider().last_committed_gtid(); + if (gtid_str.alloc(wsrep::gtid_c_str_len())) + { + my_error(ER_OUTOFMEMORY, wsrep::gtid_c_str_len()); + null_value= true; + return NULL; + } + ssize_t gtid_len= wsrep::gtid_print_to_c_str(gtid, (char*) gtid_str.ptr(), + wsrep::gtid_c_str_len()); + if (gtid_len < 0) + { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), func_name(), + "wsrep_gtid_print failed"); + null_value= true; + return NULL; + } + gtid_str.length(gtid_len); + return >id_str; +} + +longlong Item_func_wsrep_sync_wait_upto::val_int() +{ + int timeout= -1; + String* gtid_str= args[0]->val_str(&value); + if (gtid_str == NULL) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), func_name()); + return 0LL; + } + + if (arg_count == 2) + { + timeout= args[1]->val_int(); + } + + wsrep_gtid_t gtid; + int gtid_len= wsrep_gtid_scan(gtid_str->ptr(), gtid_str->length(), >id); + if (gtid_len < 0) + { + my_error(ER_WRONG_ARGUMENTS, MYF(0), func_name()); + return 0LL; + } + + if (gtid.seqno == WSREP_SEQNO_UNDEFINED && + wsrep_uuid_compare(>id.uuid, &WSREP_UUID_UNDEFINED) == 0) + { + return 1LL; + } + + enum wsrep::provider::status status= + wsrep_sync_wait_upto(current_thd, >id, timeout); + + if (status) + { + int err; + switch (status) { + case wsrep::provider::error_transaction_missing: + err= ER_WRONG_ARGUMENTS; + break; + default: + err= ER_LOCK_WAIT_TIMEOUT; + } + my_error(err, MYF(0), func_name()); + return 0LL; + } + return 1LL; +} +#endif /* WITH_WSREP */ diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 762a3c2559e..2ead0f44e49 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -1804,5 +1804,56 @@ public: Item *get_copy(THD *thd) { return get_item_copy<Item_temptable_rowid>(thd, this); } }; +#ifdef WITH_WSREP + +#include "wsrep_api.h" + +class Item_func_wsrep_last_written_gtid: public Item_str_ascii_func +{ + String gtid_str; +public: + Item_func_wsrep_last_written_gtid(THD *thd): Item_str_ascii_func(thd) {} + const char *func_name() const { return "wsrep_last_written_gtid"; } + String *val_str_ascii(String *); + bool fix_length_and_dec() + { + max_length= WSREP_GTID_STR_LEN; + maybe_null= true; + return FALSE; + } + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_wsrep_last_written_gtid>(thd, this); } +}; + +class Item_func_wsrep_last_seen_gtid: public Item_str_ascii_func +{ + String gtid_str; +public: + Item_func_wsrep_last_seen_gtid(THD *thd): Item_str_ascii_func(thd) {} + const char *func_name() const { return "wsrep_last_seen_gtid"; } + String *val_str_ascii(String *); + bool fix_length_and_dec() + { + max_length= WSREP_GTID_STR_LEN; + maybe_null= true; + return FALSE; + } + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_wsrep_last_seen_gtid>(thd, this); } +}; + +class Item_func_wsrep_sync_wait_upto: public Item_int_func +{ + String value; +public: + Item_func_wsrep_sync_wait_upto(THD *thd, Item *a): Item_int_func(thd, a) {} + Item_func_wsrep_sync_wait_upto(THD *thd, Item *a, Item* b): Item_int_func(thd, a, b) {} + const Type_handler *type_handler() const { return &type_handler_string; } + const char *func_name() const { return "wsrep_sync_wait_upto_gtid"; } + longlong val_int(); + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_wsrep_sync_wait_upto>(thd, this); } +}; +#endif /* WITH_WSREP */ #endif /* ITEM_STRFUNC_INCLUDED */ diff --git a/sql/lock.cc b/sql/lock.cc index 1564059bb20..c1140eddaae 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -1100,20 +1100,16 @@ void Global_read_lock::unlock_global_read_lock(THD *thd) #ifdef WITH_WSREP if (m_state == GRL_ACQUIRED_AND_BLOCKS_COMMIT) { - if (WSREP(thd) || wsrep_node_is_donor()) + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + if (server_state.state() == Wsrep_server_state::s_donor) { + /* TODO: maybe redundant here?: */ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; - wsrep->resume(wsrep); - /* resync here only if we did implicit desync earlier */ - if (!wsrep_desync && wsrep_node_is_synced()) - { - int ret = wsrep->resync(wsrep); - if (ret != WSREP_OK) - { - WSREP_WARN("resync failed %d for FTWRL: db: %s, query: %s", - ret, thd->get_db(), thd->query()); - } - } + server_state.resume(); + } + else if (WSREP(thd)) + { + server_state.resume_and_resync(); } } #endif /* WITH_WSREP */ @@ -1159,62 +1155,30 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT; #ifdef WITH_WSREP + /* Native threads should bail out before wsrep oprations to follow. - Donor servicing thread is an exception, it should pause provider but not desync, - as it is already desynced in donor state + Donor servicing thread is an exception, it should pause provider + but not desync, as it is already desynced in donor state */ - if (!WSREP(thd) && !wsrep_node_is_donor()) + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + if (!WSREP(thd) && server_state.state() != Wsrep_server_state::s_donor) { DBUG_RETURN(FALSE); } - /* if already desynced or donor, avoid double desyncing - if not in PC and synced, desyncing is not possible either - */ - if (wsrep_desync || !wsrep_node_is_synced()) + wsrep::seqno paused_seqno; + if (server_state.state() == Wsrep_server_state::s_donor) { - WSREP_DEBUG("desync set upfont, skipping implicit desync for FTWRL: %d", - wsrep_desync); + paused_seqno= server_state.pause(); } else { - int rcode; - WSREP_DEBUG("running implicit desync for node"); - rcode = wsrep->desync(wsrep); - if (rcode != WSREP_OK) - { - WSREP_WARN("FTWRL desync failed %d for schema: %s, query: %s", - rcode, thd->get_db(), thd->query()); - my_message(ER_LOCK_DEADLOCK, "wsrep desync failed for FTWRL", MYF(0)); - DBUG_RETURN(TRUE); - } - } - - long long ret = wsrep->pause(wsrep); - if (ret >= 0) - { - wsrep_locked_seqno= ret; + paused_seqno= server_state.desync_and_pause(); } - else if (ret != -ENOSYS) /* -ENOSYS - no provider */ + WSREP_INFO("Server paused at: %lld", paused_seqno.get()); + if (paused_seqno.get() >= 0) { - long long ret = wsrep->pause(wsrep); - if (ret >= 0) - { - wsrep_locked_seqno= ret; - } - else if (ret != -ENOSYS) /* -ENOSYS - no provider */ - { - WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret)); - - /* - For some reason Galera wants to crash here in debug build. - It is equivalent of original assertion. - */ - DBUG_ASSERT(0); - wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; - my_error(ER_LOCK_DEADLOCK, MYF(0)); - DBUG_RETURN(TRUE); - } + wsrep_locked_seqno= paused_seqno.get(); } #endif /* WITH_WSREP */ DBUG_RETURN(FALSE); diff --git a/sql/log.cc b/sql/log.cc index a56117a4ac1..68e34513d40 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -55,10 +55,14 @@ #include "sql_show.h" #include "my_pthread.h" #include "semisync_master.h" -#include "wsrep_mysqld.h" #include "sp_rcontext.h" #include "sp_head.h" +#include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ + /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 #define MAX_TIME_SIZE 32 @@ -1703,7 +1707,7 @@ static int binlog_close_connection(handlerton *hton, THD *thd) (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); #ifdef WITH_WSREP if (cache_mngr && !cache_mngr->trx_cache.empty()) { - IO_CACHE* cache= get_trans_log(thd); + IO_CACHE* cache= cache_mngr->get_binlog_cache_log(true); uchar *buf; size_t len=0; wsrep_write_cache_buf(cache, &buf, &len); @@ -2297,8 +2301,17 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ +#ifdef WITH_WSREP + /* for streaming replication, we must replicate savepoint rollback so that + slaves can maintain SR transactions + */ + if (unlikely(thd->wsrep_trx().is_streaming() || + (trans_has_updated_non_trans_table(thd)) || + (thd->variables.option_bits & OPTION_KEEP_LOG))) +#else if (unlikely(trans_has_updated_non_trans_table(thd) || (thd->variables.option_bits & OPTION_KEEP_LOG))) +#endif /* WITH_WSREP */ { char buf[1024]; String log_query(buf, sizeof(buf), &my_charset_bin); @@ -5970,7 +5983,9 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, DBUG_PRINT("enter", ("standalone: %d", standalone)); #ifdef WITH_WSREP - if (WSREP(thd) && thd->wsrep_trx_meta.gtid.seqno != -1 && wsrep_gtid_mode && !thd->variables.gtid_seq_no) + if (WSREP(thd) && + (wsrep_thd_trx_seqno(thd) > 0) && + wsrep_gtid_mode && !thd->variables.gtid_seq_no) { domain_id= wsrep_gtid_domain_id; } else { @@ -6287,7 +6302,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) */ /* applier and replayer can skip writing binlog events */ if ((WSREP_EMULATE_BINLOG(thd) && - IF_WSREP(thd->wsrep_exec_mode != REPL_RECV, 0)) || is_open()) + IF_WSREP(thd->wsrep_cs().mode() == wsrep::client_state::m_local, 0)) || is_open()) { my_off_t UNINIT_VAR(my_org_b_tell); #ifdef HAVE_REPLICATION @@ -7670,7 +7685,11 @@ bool MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) { int is_leader= queue_for_group_commit(entry); - +#ifdef WITH_WSREP + if (is_leader >= 0 && + wsrep_ordered_commit(entry->thd, entry->all, wsrep_apply_error())) + return true; +#endif /* WITH_WSREP */ /* The first in the queue handles group commit for all; the others just wait to be signalled when group commit is done. @@ -10592,7 +10611,10 @@ maria_declare_plugin(binlog) maria_declare_plugin_end; #ifdef WITH_WSREP -IO_CACHE * get_trans_log(THD * thd) +#include "wsrep_trans_observer.h" +#include "wsrep_mysqld.h" + +IO_CACHE *wsrep_get_trans_cache(THD * thd) { DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF); binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) @@ -10605,17 +10627,10 @@ IO_CACHE * get_trans_log(THD * thd) return NULL; } - -bool wsrep_trans_cache_is_empty(THD *thd) -{ - binlog_cache_mngr *const cache_mngr= - (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); - return (!cache_mngr || cache_mngr->trx_cache.empty()); -} - - -void thd_binlog_trx_reset(THD * thd) +void wsrep_thd_binlog_trx_reset(THD * thd) { + DBUG_ENTER("wsrep_thd_binlog_trx_reset"); + WSREP_DEBUG("wsrep_thd_binlog_reset"); /* todo: fix autocommit select to not call the caller */ @@ -10634,6 +10649,7 @@ void thd_binlog_trx_reset(THD * thd) } } thd->clear_binlog_table_maps(); + DBUG_VOID_RETURN; } @@ -10646,4 +10662,78 @@ void thd_binlog_rollback_stmt(THD * thd) if (cache_mngr) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); } + +bool wsrep_stmt_rollback_is_safe(THD* thd) +{ + bool ret(true); + + DBUG_ENTER("wsrep_binlog_stmt_rollback_is_safe"); + + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + + if (binlog_hton && cache_mngr) + { + binlog_cache_data * trx_cache = &cache_mngr->trx_cache; + if (thd->wsrep_sr().fragments_certified() > 0 && + (trx_cache->get_prev_position() == MY_OFF_T_UNDEF || + trx_cache->get_prev_position() < thd->wsrep_sr().bytes_certified())) + { + WSREP_DEBUG("statement rollback is not safe for streaming replication" + " pre-stmt_pos: %llu, frag repl pos: %lu\n" + "Thread: %llu, SQL: %s", + trx_cache->get_prev_position(), + thd->wsrep_sr().bytes_certified(), + thd->thread_id, thd->query()); + ret = false; + } + } + DBUG_RETURN(ret); +} + +void wsrep_register_binlog_handler(THD *thd, bool trx) +{ + DBUG_ENTER("register_binlog_handler"); + /* + If this is the first call to this function while processing a statement, + the transactional cache does not have a savepoint defined. So, in what + follows: + . an implicit savepoint is defined; + . callbacks are registered; + . binary log is set as read/write. + + The savepoint allows for truncating the trx-cache transactional changes + fail. Callbacks are necessary to flush caches upon committing or rolling + back a statement or a transaction. However, notifications do not happen + if the binary log is set as read/write. + */ + //binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd); + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + /* cache_mngr may be missing e.g. in mtr test ev51914.test */ + if (cache_mngr && cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) + { + /* + Set an implicit savepoint in order to be able to truncate a trx-cache. + */ + my_off_t pos= 0; + binlog_trans_log_savepos(thd, &pos); + cache_mngr->trx_cache.set_prev_position(pos); + + /* + Set callbacks in order to be able to call commmit or rollback. + */ + if (trx) + trans_register_ha(thd, TRUE, binlog_hton); + trans_register_ha(thd, FALSE, binlog_hton); + + /* + Set the binary log as read/write otherwise callbacks are not called. + */ + thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write(); + } + DBUG_VOID_RETURN; +} + #endif /* WITH_WSREP */ diff --git a/sql/log.h b/sql/log.h index 7dfdb36c442..42fcfcc3ab6 100644 --- a/sql/log.h +++ b/sql/log.h @@ -18,7 +18,6 @@ #define LOG_H #include "handler.h" /* my_xid */ -#include "wsrep.h" #include "wsrep_mysqld.h" #include "rpl_constants.h" @@ -1212,6 +1211,10 @@ static inline TC_LOG *get_tc_log_implementation() return &tc_log_mmap; } +#ifdef WITH_WSREP +IO_CACHE* wsrep_get_trans_cache(THD *); +void wsrep_thd_binlog_trx_reset(THD * thd); +#endif /* WITH_WSREP */ class Gtid_list_log_event; const char * diff --git a/sql/log_event.cc b/sql/log_event.cc index 7a0d0beb5ad..ed2b25929e4 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -5780,6 +5780,14 @@ compare_errors: "unexpected success or fatal error"), thd->get_db(), query_arg); thd->is_slave_error= 1; +#ifdef WITH_WSREP + if (thd->wsrep_apply_toi && wsrep_must_ignore_error(thd)) + { + thd->clear_error(1); + thd->killed= NOT_KILLED; + thd->wsrep_has_ignored_error= true; + } +#endif /* WITH_WSREP */ } /* @@ -11287,13 +11295,13 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) { WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", - thd->get_stmt_da()->sql_errno(), - thd->is_fatal_error, - thd->wsrep_exec_mode, - thd->wsrep_conflict_state, - (long long)wsrep_thd_trx_seqno(thd)); + thd->get_stmt_da()->sql_errno(), + thd->is_fatal_error, + thd->wsrep_cs().mode(), + thd->wsrep_trx().state(), + (long long) wsrep_thd_trx_seqno(thd)); } -#endif +#endif /* WITH_WSREP */ if ((thd->is_slave_error || thd->is_fatal_error) && !is_parallel_retry_error(rgi, actual_error)) { @@ -11430,10 +11438,10 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) #ifdef HAVE_QUERY_CACHE #ifdef WITH_WSREP /* - Moved invalidation right before the call to rows_event_stmt_cleanup(), - to avoid query cache being polluted with stale entries. + Moved invalidation right before the call to rows_event_stmt_cleanup(), + to avoid query cache being polluted with stale entries, */ - if (! (WSREP(thd) && (thd->wsrep_exec_mode == REPL_RECV))) + if (! (WSREP(thd) && wsrep_thd_is_applying(thd))) { #endif /* WITH_WSREP */ query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); @@ -11546,6 +11554,13 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) bool ignored_error= (idempotent_error == 0 ? ignored_error_code(actual_error) : 0); +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_ignored_error_code(this, actual_error)) + { + idempotent_error= true; + thd->wsrep_has_ignored_error= true; + } +#endif /* WITH_WSREP */ if (idempotent_error || ignored_error) { if (global_system_variables.log_warnings) @@ -11633,7 +11648,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) restore_empty_query_table_list(thd->lex); #if defined(WITH_WSREP) && defined(HAVE_QUERY_CACHE) - if (WSREP(thd) && thd->wsrep_exec_mode == REPL_RECV) + if (WSREP(thd) && wsrep_thd_is_applying(thd)) { query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); } diff --git a/sql/mdl.cc b/sql/mdl.cc index 19468a124c6..ccd7a71e9f4 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -24,9 +24,6 @@ #include <mysql/plugin.h> #include <mysql/service_thd_wait.h> #include <mysql/psi/mysql_stage.h> -#include "wsrep_mysqld.h" -#include "wsrep_thd.h" - #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_wait_LOCK_wait_status; @@ -1218,10 +1215,9 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) wsrep_thd_is_BF(ticket->get_ctx()->get_thd(), false)) { Ticket_iterator itw(ticket->get_lock()->m_waiting); - Ticket_iterator itg(ticket->get_lock()->m_granted); DBUG_ASSERT(WSREP_ON); - MDL_ticket *waiting, *granted; + MDL_ticket *waiting; MDL_ticket *prev=NULL; bool added= false; @@ -1240,20 +1236,8 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) } /* Otherwise, insert the ticket at the back of the waiting list. */ - if (!added) m_list.push_back(ticket); - - while ((granted= itg++)) - { - if (granted->get_ctx() != ticket->get_ctx() && - granted->is_incompatible_when_granted(ticket->get_type())) - { - if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted, - &ticket->get_lock()->key)) - { - WSREP_DEBUG("MDL victim killed at add_ticket"); - } - } - } + if (!added) + m_list.push_back(ticket); } else #endif /* WITH_WSREP */ @@ -1709,6 +1693,12 @@ MDL_lock::MDL_backup_lock::m_waiting_incompatible[MDL_BACKUP_END]= Check if request for the metadata lock can be satisfied given its current state. + New lock request can be satisfied iff: + - There are no incompatible types of satisfied requests + in other contexts + - There are no waiting requests which have higher priority + than this request when priority was not ignored. + @param type_arg The requested lock type. @param requestor_ctx The MDL context of the requestor. @param ignore_lock_priority Ignore lock priority. @@ -1726,78 +1716,72 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, MDL_context *requestor_ctx, bool ignore_lock_priority) const { - bool can_grant= FALSE; bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; - bool wsrep_can_grant= TRUE; +#ifdef WITH_WSREP /* - New lock request can be satisfied iff: - - There are no incompatible types of satisfied requests - in other contexts - - There are no waiting requests which have higher priority - than this request when priority was not ignored. + Approve lock request in BACKUP namespace for BF threads. + We should get rid of this code and forbid FTWRL/BACKUP statements + when wsrep is active. */ - if (ignore_lock_priority || !(m_waiting.bitmap() & waiting_incompat_map)) + if ((wsrep_thd_is_toi(requestor_ctx->get_thd()) || + wsrep_thd_is_applying(requestor_ctx->get_thd())) && + key.mdl_namespace() == MDL_key::BACKUP) { - if (! (m_granted.bitmap() & granted_incompat_map)) - can_grant= TRUE; - else + bool waiting_incompatible= m_waiting.bitmap() & waiting_incompat_map; + bool granted_incompatible= m_granted.bitmap() & granted_incompat_map; + if (waiting_incompatible || granted_incompatible) { - Ticket_iterator it(m_granted); - MDL_ticket *ticket; + WSREP_DEBUG("global lock granted for BF%s: %lu %s", + waiting_incompatible ? " (waiting queue)" : "", + thd_get_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + } + return true; + } +#endif /* WITH_WSREP */ - /* Check that the incompatible lock belongs to some other context. */ - while ((ticket= it++)) + if (!ignore_lock_priority && (m_waiting.bitmap() & waiting_incompat_map)) + return false; + + if (m_granted.bitmap() & granted_incompat_map) + { + Ticket_iterator it(m_granted); + bool can_grant= true; + + /* Check that the incompatible lock belongs to some other context. */ + while (auto ticket= it++) + { + if (ticket->get_ctx() != requestor_ctx && + ticket->is_incompatible_when_granted(type_arg)) { - if (ticket->get_ctx() != requestor_ctx && - ticket->is_incompatible_when_granted(type_arg)) - { + can_grant= false; #ifdef WITH_WSREP - if (wsrep_thd_is_BF(requestor_ctx->get_thd(),false) && - key.mdl_namespace() == MDL_key::BACKUP) - { - WSREP_DEBUG("global lock granted for BF: %lu %s", - thd_get_thread_id(requestor_ctx->get_thd()), - wsrep_thd_query(requestor_ctx->get_thd())); - can_grant = true; - } - else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket, &key)) + /* + non WSREP threads must report conflict immediately + note: RSU processing wsrep threads, have wsrep_on==OFF + */ + if (WSREP(requestor_ctx->get_thd()) || + requestor_ctx->get_thd()->wsrep_cs().mode() == + wsrep::client_state::m_rsu) + { + wsrep_handle_mdl_conflict(requestor_ctx, ticket, &key); + if (wsrep_log_conflicts) { - wsrep_can_grant= FALSE; - if (wsrep_log_conflicts) - { - MDL_lock * lock = ticket->get_lock(); - WSREP_INFO( - "MDL conflict db=%s table=%s ticket=%d solved by %s", - lock->key.db_name(), lock->key.name(), ticket->get_type(), - "abort" ); - } + auto key= ticket->get_key(); + WSREP_INFO("MDL conflict db=%s table=%s ticket=%d solved by abort", + key->db_name(), key->name(), ticket->get_type()); } - else - can_grant= TRUE; - /* Continue loop */ -#else - break; -#endif /* WITH_WSREP */ + continue; } +#endif /* WITH_WSREP */ + break; } - if ((ticket == NULL) && wsrep_can_grant) - can_grant= TRUE; /* Incompatible locks are our own. */ - } - } - else - { - if (wsrep_thd_is_BF(requestor_ctx->get_thd(), false) && - key.mdl_namespace() == MDL_key::BACKUP) - { - WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", - thd_get_thread_id(requestor_ctx->get_thd()), - wsrep_thd_query(requestor_ctx->get_thd())); - can_grant = true; } + return can_grant; } - return can_grant; + return true; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a7e29811dd1..216b5d1c622 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -72,8 +72,10 @@ #include "debug_sync.h" #include "wsrep_mysqld.h" #include "wsrep_var.h" +#ifdef WITH_WSREP #include "wsrep_thd.h" #include "wsrep_sst.h" +#endif /* WITH_WSREP */ #include "proxy_protocol.h" #include "sql_callback.h" @@ -1599,7 +1601,7 @@ static void close_connections(void) error= mysql_cond_timedwait(&COND_start_thread, &LOCK_start_thread, &abstime); if (error != EINTR) - break; + break; } #ifdef EXTRA_DEBUG if (error != 0 && error != ETIMEDOUT && !count++) @@ -1661,11 +1663,12 @@ static void close_connections(void) #ifdef WITH_WSREP /* skip wsrep system threads as well */ - if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier)) + if (WSREP(tmp) && (wsrep_thd_is_applying(tmp) || tmp->wsrep_applier)) continue; #endif tmp->set_killed(KILL_SERVER_HARD); MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); + if (WSREP(tmp)) mysql_mutex_lock(&tmp->LOCK_thd_data); mysql_mutex_lock(&tmp->LOCK_thd_kill); if (tmp->mysys_var) { @@ -1690,6 +1693,7 @@ static void close_connections(void) mysql_mutex_unlock(&tmp->mysys_var->mutex); } mysql_mutex_unlock(&tmp->LOCK_thd_kill); + if (WSREP(tmp)) mysql_mutex_unlock(&tmp->LOCK_thd_data); } mysql_mutex_unlock(&LOCK_thread_count); // For unlink from list @@ -1759,7 +1763,7 @@ static void close_connections(void) * The code here makes sure mysqld will not hang during shutdown * even if wsrep provider has problems in shutting down. */ - if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV) + if (WSREP(tmp) && wsrep_thd_is_applying(tmp)) { sql_print_information("closing wsrep system thread"); tmp->set_killed(KILL_CONNECTION); @@ -1782,6 +1786,12 @@ static void close_connections(void) mysql_mutex_unlock(&LOCK_thread_count); } end_slave(); +#ifdef WITH_WSREP + if (wsrep_inited == 1) + { + wsrep_deinit(true); + } +#endif /* All threads has now been aborted */ DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); mysql_mutex_lock(&LOCK_thread_count); @@ -1936,17 +1946,16 @@ static void kill_server(int sig) else sql_print_error(ER_DEFAULT(ER_GOT_SIGNAL),my_progname,sig); /* purecov: inspected */ +#ifdef WITH_WSREP /* Stop wsrep threads in case they are running. */ if (wsrep_running_threads > 0) { - wsrep_stop_replication(NULL); + wsrep_shutdown_replication(); } +#endif close_connections(); - if (wsrep_inited == 1) - wsrep_deinit(true); - if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -2028,8 +2037,8 @@ extern "C" void unireg_abort(int exit_code) disable_log_notes= 1; #ifdef WITH_WSREP - /* Check if wsrep class is used. If yes, then cleanup wsrep */ - if (wsrep) + if (WSREP_ON && + Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected) { /* This is an abort situation, we cannot expect to gracefully close all @@ -2037,15 +2046,19 @@ extern "C" void unireg_abort(int exit_code) */ wsrep_close_client_connections(FALSE); shutdown_in_progress= 1; - wsrep->disconnect(wsrep); + Wsrep_server_state::instance().disconnect(); WSREP_INFO("Service disconnected."); wsrep_close_threads(NULL); /* this won't close all threads */ sleep(1); /* so give some time to exit for those which can */ WSREP_INFO("Some threads may fail to exit."); - + } + if (WSREP_ON) + { /* In bootstrap mode we deinitialize wsrep here. */ - if (opt_bootstrap && wsrep_inited) - wsrep_deinit(true); + if (opt_bootstrap || wsrep_recovery) + { + if (wsrep_inited) wsrep_deinit(true); + } } #endif // WITH_WSREP @@ -2073,6 +2086,9 @@ static void mysqld_exit(int exit_code) rpl_deinit_gtid_waiting(); rpl_deinit_gtid_slave_state(); wait_for_signal_thread_to_end(); +#ifdef WITH_WSREP + wsrep_deinit_server(); +#endif /* WITH_WSREP */ mysql_audit_finalize(); clean_up_mutexes(); clean_up_error_log_mutex(); @@ -2309,9 +2325,6 @@ static void clean_up_mutexes() ****************************************************************************/ #ifdef EMBEDDED_LIBRARY -static void set_ports() -{ -} void close_connection(THD *thd, uint sql_errno) { } @@ -2805,12 +2818,14 @@ void unlink_thd(THD *thd) thd->add_status_to_global(); unlink_not_visible_thd(thd); +#ifdef WITH_WSREP /* Do not decrement when its wsrep system thread. wsrep_applier is set for applier as well as rollbacker threads. */ - if (IF_WSREP(!thd->wsrep_applier, 1)) - dec_connection_count(thd->scheduler); + if (!thd->wsrep_applier) +#endif /* WITH_WSREP */ + dec_connection_count(thd->scheduler); thd->free_connection(); @@ -5239,7 +5254,9 @@ static int init_server_components() wsrep_thr_init(); #endif - if (WSREP_ON && !wsrep_recovery && !opt_abort) /* WSREP BEFORE SE */ +#ifdef WITH_WSREP + if (wsrep_init_server()) unireg_abort(1); + if (WSREP_ON && !wsrep_recovery && !opt_abort) { if (opt_bootstrap) // bootsrap option given - disable wsrep functionality { @@ -5272,6 +5289,7 @@ static int init_server_components() } } } +#endif /* WITH_WSREP */ if (opt_bin_log) { @@ -5848,8 +5866,7 @@ int mysqld_main(int argc, char **argv) set_user(mysqld_user, user_info); } - if (WSREP_ON && wsrep_check_opts()) - global_system_variables.wsrep_on= 0; + if (WSREP_ON && wsrep_check_opts()) unireg_abort(1); /* The subsequent calls may take a long time : e.g. innodb log read. @@ -5949,24 +5966,11 @@ int mysqld_main(int argc, char **argv) } else { - wsrep_SE_initialized(); - - if (wsrep_before_SE()) - { - /*! in case of no SST wsrep waits in view handler callback */ - wsrep_SE_init_grab(); - wsrep_SE_init_done(); - /*! in case of SST wsrep waits for wsrep->sst_received */ - if (wsrep_sst_continue()) - { - WSREP_ERROR("Failed to signal the wsrep provider to continue."); - } - } - else + wsrep_init_globals(); + if (!wsrep_before_SE()) { wsrep_init_startup (false); } - wsrep_create_appliers(wsrep_slave_threads - 1); } } @@ -8096,6 +8100,20 @@ SHOW_VAR status_vars[]= { {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC}, #endif #ifdef WITH_WSREP + {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"wsrep_ready", (char*) &wsrep_show_ready, SHOW_FUNC}, + {"wsrep_cluster_state_uuid",(char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"wsrep_local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_FUNC}, + {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep_provider_capabilities", (char*) &wsrep_provider_capabilities, SHOW_CHAR_PTR}, + {"wsrep_thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}, + {"wsrep_cluster_capabilities", (char*) &wsrep_cluster_capabilities, SHOW_CHAR_PTR}, {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC}, #endif {NullS, NullS, SHOW_LONG} @@ -9558,7 +9576,9 @@ void refresh_status(THD *thd) reset_status_vars(); #ifdef WITH_WSREP if (WSREP_ON) - wsrep->stats_reset(wsrep); + { + Wsrep_server_state::instance().provider().reset_status(); + } #endif /* WITH_WSREP */ /* Reset the counters of all key caches (default and named). */ diff --git a/sql/protocol.cc b/sql/protocol.cc index 7eee9283989..38eb8ac99f7 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -551,8 +551,26 @@ static uchar *net_store_length_fast(uchar *packet, size_t length) void Protocol::end_statement() { - /* sanity check*/ - DBUG_ASSERT_IF_WSREP(!(WSREP(thd) && thd->wsrep_conflict_state == REPLAYING)); +#ifdef WITH_WSREP + /* + Commented out: This sanity check does not hold in general. + Thd->LOCK_thd_data() must be unlocked before sending response + to client, so BF abort may sneak in here. + DBUG_ASSERT(!WSREP(thd) || thd->wsrep_conflict_state() == NO_CONFLICT); + */ + + /* + sanity check, don't send end statement while replaying + */ + DBUG_ASSERT(thd->wsrep_trx().state() != wsrep::transaction::s_replaying); + if (WSREP(thd) && thd->wsrep_trx().state() == + wsrep::transaction::s_replaying) + { + WSREP_ERROR("attempting net_end_statement while replaying"); + return; + } +#endif /* WITH_WSREP */ + DBUG_ENTER("Protocol::end_statement"); DBUG_ASSERT(! thd->get_stmt_da()->is_sent()); bool error= FALSE; diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index 94c1f08e4e3..84661fa513d 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -329,6 +329,7 @@ unpack_row(rpl_group_info *rgi, (int) (pack_ptr - old_pack_ptr))); if (!pack_ptr) { +#ifdef WITH_WSREP if (WSREP_ON) { /* @@ -344,7 +345,7 @@ unpack_row(rpl_group_info *rgi, (table_found) ? "found" : "not found", row_end ); } - +#endif /* WITH_WSREP */ rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, rgi->gtid_info(), "Could not read field '%s' of table '%s.%s'", diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc new file mode 100644 index 00000000000..f4cf49b9b84 --- /dev/null +++ b/sql/service_wsrep.cc @@ -0,0 +1,255 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "mariadb.h" + +#include "mysql/service_wsrep.h" +#include "wsrep/key.hpp" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "sql_class.h" +#include "debug_sync.h" + +extern "C" my_bool wsrep_on(const THD *thd) +{ + return my_bool(WSREP(thd)); +} + +extern "C" void wsrep_thd_LOCK(const THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_thd_data); +} + +extern "C" void wsrep_thd_UNLOCK(const THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_thd_data); +} + +extern "C" const char* wsrep_thd_client_state_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().state()); +} + +extern "C" const char* wsrep_thd_client_mode_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().mode()); +} + +extern "C" const char* wsrep_thd_transaction_state_str(const THD *thd) +{ + return wsrep::to_c_string(thd->wsrep_cs().transaction().state()); +} + + +extern "C" const char *wsrep_thd_query(const THD *thd) +{ + return thd ? thd->query() : NULL; +} + +extern "C" query_id_t wsrep_thd_transaction_id(const THD *thd) +{ + return thd->wsrep_cs().transaction().id().get(); +} + +extern "C" long long wsrep_thd_trx_seqno(const THD *thd) +{ + const wsrep::client_state& cs= thd->wsrep_cs(); + if (cs.mode() == wsrep::client_state::m_toi) + { + return cs.toi_meta().seqno().get(); + } + else + { + return cs.transaction().ws_meta().seqno().get(); + } +} + +extern "C" void wsrep_thd_self_abort(THD *thd) +{ + thd->wsrep_cs().bf_abort(wsrep::seqno(0)); +} + +extern "C" const char* wsrep_get_sr_table_name() +{ + return wsrep_sr_table_name_full; +} + +extern "C" my_bool wsrep_get_debug() +{ + return wsrep_debug; +} + +extern "C" my_bool wsrep_thd_is_local(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_local; +} + +extern "C" my_bool wsrep_thd_is_applying(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_high_priority; +} + +extern "C" my_bool wsrep_thd_is_toi(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_toi; +} + +extern "C" my_bool wsrep_thd_is_local_toi(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_toi && + thd->wsrep_cs().toi_mode() == wsrep::client_state::m_local; + +} + +extern "C" my_bool wsrep_thd_is_in_rsu(const THD *thd) +{ + return thd->wsrep_cs().mode() == wsrep::client_state::m_rsu; +} + +extern "C" my_bool wsrep_thd_is_BF(const THD *thd, my_bool sync) +{ + my_bool status = FALSE; + if (thd && WSREP(thd)) + { + if (sync) mysql_mutex_lock(&thd->LOCK_thd_data); + status = (wsrep_thd_is_applying(thd) || wsrep_thd_is_toi(thd)); + if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data); + } + return status; +} + +extern "C" my_bool wsrep_thd_is_SR(const THD *thd) +{ + return thd && thd->wsrep_cs().transaction().is_streaming(); +} + +extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, + THD *victim_thd) +{ + DBUG_ASSERT(victim_thd); + if (!victim_thd || !wsrep_on(bf_thd)) return; + + WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %lu frags %lu conf %s", + victim_thd->thread_id, + victim_thd->wsrep_trx_id(), + victim_thd->wsrep_sr().fragments_certified(), + wsrep_thd_transaction_state_str(victim_thd)); + if (bf_thd) victim_thd->store_globals(); + if (!bf_thd) + { + DEBUG_SYNC(victim_thd, "wsrep_before_SR_rollback"); + } + if (bf_thd) + { + wsrep_bf_abort(bf_thd, victim_thd); + } + else + { + wsrep_thd_self_abort(victim_thd); + } + if (bf_thd) bf_thd->store_globals(); +} + +extern "C" my_bool wsrep_thd_bf_abort(const THD *bf_thd, THD *victim_thd, + my_bool signal) +{ + if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) + { + WSREP_DEBUG("BF abort for non active transaction"); + wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); + } + my_bool ret= wsrep_bf_abort(bf_thd, victim_thd); + /* + Send awake signal if victim was BF aborted or does not + have wsrep on. Note that this should never interrupt RSU + as RSU has paused the provider. + */ + if ((ret || !wsrep_on(victim_thd)) && signal) + victim_thd->awake(KILL_QUERY); + return ret; +} + +extern "C" my_bool wsrep_thd_skip_locking(const THD *thd) +{ + return thd && thd->wsrep_skip_locking; +} + +extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) +{ + if (wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(left), + (long long)wsrep_thd_trx_seqno(right)); + return TRUE; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(left), + (long long)wsrep_thd_trx_seqno(right)); + return FALSE; +} + +extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + if (thd != 0) + { + const wsrep::client_state& cs(thd->wsrep_cs()); + const enum wsrep::transaction::state tx_state(cs.transaction().state()); + switch (tx_state) + { + case wsrep::transaction::s_must_abort: + return (cs.state() == wsrep::client_state::s_exec || + cs.state() == wsrep::client_state::s_result); + case wsrep::transaction::s_aborting: + case wsrep::transaction::s_aborted: + return true; + default: + return false; + } + } + return false; +} + +static inline enum wsrep::key::type +map_key_type(enum Wsrep_service_key_type type) +{ + switch (type) + { + case WSREP_SERVICE_KEY_SHARED: return wsrep::key::shared; + case WSREP_SERVICE_KEY_REFERENCE: return wsrep::key::reference; + case WSREP_SERVICE_KEY_UPDATE: return wsrep::key::update; + case WSREP_SERVICE_KEY_EXCLUSIVE: return wsrep::key::exclusive; + } + return wsrep::key::exclusive; +} + +extern "C" int wsrep_thd_append_key(THD *thd, + const struct wsrep_key* key, + int n_keys, + enum Wsrep_service_key_type key_type) +{ + Wsrep_client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(client_state.transaction().active()); + int ret= 0; + for (int i= 0; i < n_keys && ret == 0; ++i) + { + wsrep::key wsrep_key(map_key_type(key_type)); + for (size_t kp= 0; kp < key[i].key_parts_num; ++kp) + { + wsrep_key.append_key_part(key[i].key_parts[kp].ptr, key[i].key_parts[kp].len); + } + ret= client_state.append_key(wsrep_key); + } + return ret; +} diff --git a/sql/slave.cc b/sql/slave.cc index 5fbe0c77661..f31021bc71e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -53,6 +53,9 @@ // Create_file_log_event, // Format_description_log_event #include "wsrep_mysqld.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif #ifdef HAVE_REPLICATION @@ -1201,6 +1204,11 @@ terminate_slave_thread(THD *thd, int error __attribute__((unused)); DBUG_PRINT("loop", ("killing slave thread")); +#ifdef WITH_WSREP + /* awake_no_mutex() requires LOCK_thd_data to be locked if wsrep + is enabled */ + if (WSREP(thd)) mysql_mutex_lock(&thd->LOCK_thd_data); +#endif /* WITH_WSREP */ mysql_mutex_lock(&thd->LOCK_thd_kill); #ifndef DONT_USE_THR_ALARM /* @@ -1214,6 +1222,9 @@ terminate_slave_thread(THD *thd, thd->awake_no_mutex(NOT_KILLED); mysql_mutex_unlock(&thd->LOCK_thd_kill); +#ifdef WITH_WSREP + if (WSREP(thd)) mysql_mutex_unlock(&thd->LOCK_thd_data); +#endif /* WITH_WSREP */ /* There is a small chance that slave thread might miss the first @@ -3943,14 +3954,20 @@ apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi, exec_res= ev->apply_event(rgi); #ifdef WITH_WSREP - if (exec_res && thd->wsrep_conflict_state != NO_CONFLICT) + if (WSREP_ON) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + if (exec_res && + thd->wsrep_trx().state() != wsrep::transaction::s_executing) { - WSREP_DEBUG("SQL apply failed, res %d conflict state: %d", - exec_res, thd->wsrep_conflict_state); + WSREP_DEBUG("SQL apply failed, res %d conflict state: %s", + exec_res, wsrep_thd_transaction_state_str(thd)); rli->abort_slave= 1; rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(), "Node has dropped from cluster"); } + mysql_mutex_unlock(&thd->LOCK_thd_data); + } #endif #ifndef DBUG_OFF @@ -4243,6 +4260,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, } if (ev) { +#ifdef WITH_WSREP + if (wsrep_before_statement(thd)) + { + WSREP_INFO("Wsrep before statement error"); + DBUG_RETURN(1); + } +#endif /* WITH_WSREP */ int exec_res; Log_event_type typ= ev->get_type_code(); @@ -4274,9 +4298,9 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) && (ev->server_id != global_system_variables.server_id || rli->replicate_same_server_id) && - rli->is_until_satisfied((rli->get_flag(Relay_log_info::IN_TRANSACTION) || !ev->log_pos) - ? rli->group_master_log_pos - : ev->log_pos - ev->data_written)) + rli->is_until_satisfied((rli->get_flag(Relay_log_info::IN_TRANSACTION) || !ev->log_pos) + ? rli->group_master_log_pos + : ev->log_pos - ev->data_written)) { sql_print_information("Slave SQL thread stopped because it reached its" " UNTIL position %llu", rli->until_pos()); @@ -4287,6 +4311,9 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, rli->abort_slave= 1; rli->stop_for_until= true; mysql_mutex_unlock(&rli->data_lock); +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ delete ev; DBUG_RETURN(1); } @@ -4324,7 +4351,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (res == 0) rli->event_relay_log_pos= rli->future_event_relay_log_pos; if (res >= 0) + { +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ DBUG_RETURN(res); + } /* Else we proceed to execute the event non-parallel. This is the case for pre-10.0 events without GTID, and for handling @@ -4359,6 +4391,9 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, "aborted because of out-of-memory error"); mysql_mutex_unlock(&rli->data_lock); delete ev; +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ DBUG_RETURN(1); } @@ -4373,6 +4408,9 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, "thread aborted because of out-of-memory error"); mysql_mutex_unlock(&rli->data_lock); delete ev; +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ DBUG_RETURN(1); } /* @@ -4401,13 +4439,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, retry. */ if (unlikely(exec_res == 2)) + { +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ DBUG_RETURN(1); - + } #ifdef WITH_WSREP mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == NO_CONFLICT) - { - mysql_mutex_unlock(&thd->LOCK_thd_data); + enum wsrep::client_error wsrep_error= thd->wsrep_cs().current_error(); + mysql_mutex_unlock(&thd->LOCK_thd_data); + if (wsrep_error == wsrep::e_success) #endif /* WITH_WSREP */ if (slave_trans_retries) { @@ -4420,8 +4462,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, We were in a transaction which has been rolled back because of a temporary error; let's seek back to BEGIN log event and retry it all again. - Note, if lock wait timeout (innodb_lock_wait_timeout exceeded) - there is no rollback since 5.0.13 (ref: manual). + Note, if lock wait timeout (innodb_lock_wait_timeout exceeded) + there is no rollback since 5.0.13 (ref: manual). We have to not only seek but also a) init_master_info(), to seek back to hot relay log's start @@ -4482,13 +4524,11 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, serial_rgi->trans_retries)); } } -#ifdef WITH_WSREP - } - else - mysql_mutex_unlock(&thd->LOCK_thd_data); -#endif /* WITH_WSREP */ thread_safe_increment64(&rli->executed_entries); +#ifdef WITH_WSREP + wsrep_after_statement(thd); +#endif /* WITH_WSREP */ DBUG_RETURN(exec_res); } mysql_mutex_unlock(&rli->data_lock); @@ -5415,12 +5455,6 @@ pthread_handler_t handle_slave_sql(void *arg) } #endif -#ifdef WITH_WSREP - thd->wsrep_exec_mode= LOCAL_STATE; - /* synchronize with wsrep replication */ - if (WSREP_ON) - wsrep_ready_wait(); -#endif DBUG_PRINT("master_info",("log_file_name: %s position: %llu", rli->group_master_log_name, rli->group_master_log_pos)); @@ -5517,7 +5551,14 @@ pthread_handler_t handle_slave_sql(void *arg) goto err; } mysql_mutex_unlock(&rli->data_lock); - +#ifdef WITH_WSREP + wsrep_open(thd); + if (wsrep_before_command(thd)) + { + WSREP_WARN("Slave SQL wsrep_before_command() failed"); + goto err; + } +#endif /* WITH_WSREP */ /* Read queries from the IO/THREAD until this thread is killed */ thd->set_command(COM_SLAVE_SQL); @@ -5554,10 +5595,16 @@ pthread_handler_t handle_slave_sql(void *arg) if (exec_relay_log_event(thd, rli, serial_rgi)) { #ifdef WITH_WSREP - if (thd->wsrep_conflict_state != NO_CONFLICT) + if (WSREP_ON) { - wsrep_node_dropped= TRUE; - rli->abort_slave= TRUE; + mysql_mutex_lock(&thd->LOCK_thd_data); + + if (thd->wsrep_cs().current_error()) + { + wsrep_node_dropped = TRUE; + rli->abort_slave = TRUE; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); } #endif /* WITH_WSREP */ @@ -5590,6 +5637,10 @@ pthread_handler_t handle_slave_sql(void *arg) "log '%s' at position %llu%s", RPL_LOG_NAME, rli->group_master_log_pos, tmp.c_ptr_safe()); } +#ifdef WITH_WSREP + wsrep_after_command_before_result(thd); + wsrep_after_command_after_result(thd); +#endif /* WITH_WSREP */ err_before_start: @@ -5708,17 +5759,17 @@ err_during_init: "SQL slave will continue"); wsrep_node_dropped= FALSE; mysql_mutex_unlock(&rli->run_lock); - WSREP_DEBUG("wsrep_conflict_state now: %d", thd->wsrep_conflict_state); - WSREP_INFO("slave restart: %d", thd->wsrep_conflict_state); - thd->wsrep_conflict_state= NO_CONFLICT; goto wsrep_restart_point; - } else { + } + else + { WSREP_INFO("Slave error due to node going non-primary"); WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " - "automatically restarted when node joins back to cluster."); + "automatically restarted when node joins back to cluster"); wsrep_restart_slave_activated= TRUE; } } + wsrep_close(thd); #endif /* WITH_WSREP */ /* diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 56b4fc8c948..8345a9efe61 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -44,6 +44,9 @@ #include "transaction.h" // trans_commit_stmt #include "sql_audit.h" #include "debug_sync.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ /* Sufficient max length of printed destinations and frame offsets (all uints). @@ -1324,6 +1327,13 @@ sp_head::execute(THD *thd, bool merge_da_on_success) sql_digest_state *parent_digest= thd->m_digest; thd->m_digest= NULL; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + thd->set_wsrep_next_trx_id(thd->query_id); + WSREP_DEBUG("assigned new next trx ID for SP, trx id: %lu", thd->wsrep_next_trx_id()); + } +#endif /* WITH_WSREP */ err_status= i->execute(thd, &ip); thd->m_digest= parent_digest; @@ -3566,6 +3576,24 @@ sp_instr_stmt::exec_core(THD *thd, uint *nextp) (char *)thd->security_ctx->host_or_ip, 3); int res= mysql_execute_command(thd); +#ifdef WITH_WSREP + if ((thd->is_fatal_error || thd->killed_errno()) && + (thd->wsrep_trx().state() == wsrep::transaction::s_executing)) + { + /* + SP was killed, and it is not due to a wsrep conflict. + We skip after_command hook at this point because + otherwise it clears the error, and cleans up the + whole transaction. For now we just return and finish + our handling once we are back to mysql_parse. + */ + WSREP_DEBUG("Skipping after_command hook for killed SP"); + } + else + { + (void) wsrep_after_statement(thd); + } +#endif /* WITH_WSREP */ MYSQL_QUERY_EXEC_DONE(res); *nextp= m_ip+1; return res; @@ -4503,8 +4531,8 @@ int sp_instr_error::execute(THD *thd, uint *nextp) { DBUG_ENTER("sp_instr_error::execute"); - my_message(m_errcode, ER_THD(thd, m_errcode), MYF(0)); + WSREP_DEBUG("sp_instr_error: %s %d", ER_THD(thd, m_errcode), thd->is_error()); *nextp= m_ip+1; DBUG_RETURN(-1); } diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index fe6fc9148bd..2edae501ccc 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -3461,7 +3461,6 @@ wsrep_error_label: WSREP_TO_ISOLATION_END; thd->set_query(query_save); - thd->wsrep_exec_mode = LOCAL_STATE; } #endif /* WITH_WSREP */ thd->restore_stmt_binlog_format(save_binlog_format); @@ -3613,7 +3612,6 @@ wsrep_error_label: WSREP_TO_ISOLATION_END; thd->set_query(query_save); - thd->wsrep_exec_mode = LOCAL_STATE; } #endif /* WITH_WSREP */ diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 05a71d7785d..4e5ac6e9381 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -476,6 +476,7 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->work_part_info= 0; #endif +#ifdef WITH_WSREP if (WSREP(thd) && (!thd->is_current_stmt_binlog_format_row() || !thd->find_temporary_table(first_table))) @@ -487,6 +488,7 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->variables.auto_increment_offset = 1; thd->variables.auto_increment_increment = 1; } +#endif result= mysql_alter_table(thd, &select_lex->db, &lex->name, &create_info, diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 1b4ffd0c61e..ad7ff34190a 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -62,7 +62,11 @@ #include <io.h> #endif #include "wsrep_mysqld.h" +#ifdef WITH_WSREP #include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ + bool No_such_table_error_handler::handle_condition(THD *, @@ -4410,13 +4414,14 @@ restart: } } +#ifdef WITH_WSREP if (WSREP_ON && wsrep_replicate_myisam && (*start) && (*start)->table && (*start)->table->file->ht == myisam_hton && - wsrep_thd_exec_mode(thd) == LOCAL_STATE && - !is_stat_table(&(*start)->db, &(*start)->alias) && + wsrep_thd_is_local(thd) && + !is_stat_table(&(*start)->db, &(*start)->alias) && thd->get_command() != COM_STMT_PREPARE && ((thd->lex->sql_command == SQLCOM_INSERT || thd->lex->sql_command == SQLCOM_INSERT_SELECT || @@ -4427,8 +4432,12 @@ restart: thd->lex->sql_command == SQLCOM_LOAD || thd->lex->sql_command == SQLCOM_DELETE))) { - WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start)); + wsrep_before_rollback(thd, true); + wsrep_after_rollback(thd, true); + wsrep_after_statement(thd); + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start)); } +#endif /* WITH_WSREP */ error: #ifdef WITH_WSREP diff --git a/sql/sql_class.cc b/sql/sql_class.cc index fa2f866a3f6..87e377c1819 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -66,8 +66,11 @@ #include "sql_callback.h" #include "lock.h" #include "wsrep_mysqld.h" -#include "wsrep_thd.h" #include "sql_connect.h" +#ifdef WITH_WSREP +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ #ifdef HAVE_SYS_SYSCALL_H #include <sys/syscall.h> @@ -639,16 +642,42 @@ THD::THD(my_thread_id id, bool is_wsrep_applier, bool skip_global_sys_var_lock) xid_hash_pins(0), m_tmp_tables_locked(false) #ifdef WITH_WSREP - , + , wsrep_applier(is_wsrep_applier), wsrep_applier_closing(false), wsrep_client_thread(false), - wsrep_apply_toi(false), + wsrep_retry_counter(0), + wsrep_PA_safe(true), + wsrep_retry_query(NULL), + wsrep_retry_query_len(0), + wsrep_retry_command(COM_CONNECT), + wsrep_consistency_check(NO_CONSISTENCY_CHECK), + wsrep_mysql_replicated(0), + wsrep_TOI_pre_query(NULL), + wsrep_TOI_pre_query_len(0), wsrep_po_handle(WSREP_PO_INITIALIZER), wsrep_po_cnt(0), wsrep_apply_format(0), - wsrep_ignore_table(false) -#endif + wsrep_apply_toi(false), + wsrep_rbr_buf(NULL), + wsrep_sync_wait_gtid(WSREP_GTID_UNDEFINED), + wsrep_affected_rows(0), + wsrep_has_ignored_error(false), + wsrep_replicate_GTID(false), + wsrep_ignore_table(false), + +/* wsrep-lib */ + m_wsrep_next_trx_id(WSREP_UNDEFINED_TRX_ID), + m_wsrep_mutex(LOCK_thd_data), + m_wsrep_cond(COND_wsrep_thd), + m_wsrep_client_service(this, m_wsrep_client_state), + m_wsrep_client_state(this, + m_wsrep_mutex, + m_wsrep_cond, + Wsrep_server_state::instance(), + m_wsrep_client_service, + wsrep::client_id(thread_id)) +#endif /*WITH_WSREP */ { ulong tmp; bzero(&variables, sizeof(variables)); @@ -771,22 +800,8 @@ THD::THD(my_thread_id id, bool is_wsrep_applier, bool skip_global_sys_var_lock) *scramble= '\0'; #ifdef WITH_WSREP - wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID; - wsrep_ws_handle.opaque = NULL; - wsrep_retry_counter = 0; - wsrep_PA_safe = true; - wsrep_retry_query = NULL; - wsrep_retry_query_len = 0; - wsrep_retry_command = COM_CONNECT; - wsrep_consistency_check = NO_CONSISTENCY_CHECK; - wsrep_mysql_replicated = 0; - wsrep_TOI_pre_query = NULL; - wsrep_TOI_pre_query_len = 0; + mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL); wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ - wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED; - wsrep_affected_rows = 0; - wsrep_replicate_GTID = false; - wsrep_skip_wsrep_GTID = false; #endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -1049,10 +1064,25 @@ Sql_condition* THD::raise_condition(uint sql_errno, is_slave_error= 1; // needed to catch query errors during replication - if (!da->is_error()) +#ifdef WITH_WSREP + /* + With wsrep we allow converting BF abort error to warning if + errors are ignored. + */ + if (!is_fatal_error && + no_errors && + (wsrep_trx().bf_aborted() || wsrep_retry_counter)) { - set_row_count_func(-1); - da->set_error_status(sql_errno, msg, sqlstate, ucid, cond); + WSREP_DEBUG("BF abort error converted to warning"); + } + else +#endif /* WITH_WSREP */ + { + if (!da->is_error()) + { + set_row_count_func(-1); + da->set_error_status(sql_errno, msg, sqlstate, ucid, cond); + } } } @@ -1113,6 +1143,13 @@ void *thd_memdup(MYSQL_THD thd, const void* str, size_t size) extern "C" void thd_get_xid(const MYSQL_THD thd, MYSQL_XID *xid) { +#ifdef WITH_WSREP + if (!thd->wsrep_xid.is_null()) + { + *xid = *(MYSQL_XID *) &thd->wsrep_xid; + } + else +#endif /* WITH_WSREP */ *xid = *(MYSQL_XID *) &thd->transaction.xid_state.xid; } @@ -1221,12 +1258,9 @@ void THD::init(bool skip_lock) first_successful_insert_id_in_cur_stmt= 0; current_backup_stage= BACKUP_FINISHED; #ifdef WITH_WSREP - wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE; - wsrep_conflict_state= NO_CONFLICT; - wsrep_query_state= QUERY_IDLE; wsrep_last_query_id= 0; - wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; - wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + wsrep_xid.null(); + wsrep_skip_locking= FALSE; wsrep_converted_lock_session= false; wsrep_retry_counter= 0; wsrep_rgi= NULL; @@ -1235,10 +1269,10 @@ void THD::init(bool skip_lock) wsrep_mysql_replicated = 0; wsrep_TOI_pre_query = NULL; wsrep_TOI_pre_query_len = 0; - wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED; + wsrep_rbr_buf = NULL; wsrep_affected_rows = 0; + m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID; wsrep_replicate_GTID = false; - wsrep_skip_wsrep_GTID = false; #endif /* WITH_WSREP */ if (variables.sql_log_bin) @@ -1467,6 +1501,13 @@ void THD::cleanup(void) #error xid_state in the cache should be replaced by the allocated value } #endif +#ifdef WITH_WSREP + if (wsrep_cs().state() != wsrep::client_state::s_none) + { + wsrep_cs().cleanup(); + } + wsrep_client_thread= false; +#endif /* WITH_WSREP */ mysql_ha_cleanup(this); locked_tables_list.unlock_locked_tables(this); @@ -1587,6 +1628,9 @@ void THD::reset_for_reuse() #ifdef SIGNAL_WITH_VIO_CLOSE active_vio = 0; #endif +#ifdef WITH_WSREP + wsrep_free_status(this); +#endif /* WITH_WSREP */ } @@ -1613,15 +1657,21 @@ THD::~THD() THD is not deleted while they access it. The following mutex_lock ensures that no one else is using this THD and it's now safe to delete */ + if (WSREP(this)) mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_lock(&LOCK_thd_kill); mysql_mutex_unlock(&LOCK_thd_kill); + if (WSREP(this)) mysql_mutex_unlock(&LOCK_thd_data); -#ifdef WITH_WSREP - delete wsrep_rgi; -#endif if (!free_connection_done) free_connection(); +#ifdef WITH_WSREP + if (wsrep_rgi != NULL) { + delete wsrep_rgi; + wsrep_rgi = NULL; + } + mysql_cond_destroy(&COND_wsrep_thd); +#endif mdl_context.destroy(); free_root(&transaction.mem_root,MYF(0)); @@ -1803,6 +1853,7 @@ void THD::awake_no_mutex(killed_state state_to_set) DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", this, current_thd, (int) state_to_set)); THD_CHECK_SENTRY(this); + if (WSREP(this)) mysql_mutex_assert_owner(&LOCK_thd_data); mysql_mutex_assert_owner(&LOCK_thd_kill); print_aborted_warning(3, "KILLED"); @@ -1835,7 +1886,8 @@ void THD::awake_no_mutex(killed_state state_to_set) } /* Interrupt target waiting inside a storage engine. */ - if (state_to_set != NOT_KILLED) + if (IF_WSREP(state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this), + state_to_set != NOT_KILLED)) ha_kill_query(this, thd_kill_level(this)); /* Broadcast a condition to kick the target if it is waiting on it. */ @@ -1988,12 +2040,6 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, if (!thd_table->needs_reopen()) { signalled|= mysql_lock_abort_for_thread(this, thd_table); - if (WSREP(this) && wsrep_thd_is_BF(this, FALSE)) - { - WSREP_DEBUG("remove_table_from_cache: %llu", - (unsigned long long) this->real_id); - wsrep_abort_thd((void *)this, (void *)in_use, FALSE); - } } } } @@ -2225,12 +2271,6 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= INVOKER_NONE; -#ifdef WITH_WSREP - if (TOTAL_ORDER == wsrep_exec_mode) - { - wsrep_exec_mode = LOCAL_STATE; - } -#endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY if (rgi_slave) @@ -2238,7 +2278,6 @@ void THD::cleanup_after_query() #endif #ifdef WITH_WSREP - wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED; if (!in_active_multi_stmt_transaction()) wsrep_affected_rows= 0; #endif /* WITH_WSREP */ @@ -5007,8 +5046,9 @@ extern "C" int thd_binlog_format(const MYSQL_THD thd) if (WSREP(thd)) { /* for wsrep binlog format is meaningful also when binlogging is off */ - return (int) thd->wsrep_binlog_format(); + return (int) WSREP_BINLOG_FORMAT(thd->variables.binlog_format); } + if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) return (int) thd->variables.binlog_format; return BINLOG_FORMAT_UNSPEC; @@ -5491,6 +5531,10 @@ void THD::set_query_and_id(char *query_arg, uint32 query_length_arg, set_query_inner(query_arg, query_length_arg, cs); mysql_mutex_unlock(&LOCK_thd_data); query_id= new_query_id; +#ifdef WITH_WSREP + set_wsrep_next_trx_id(query_id); + WSREP_DEBUG("assigned new next query and trx id: %lu", wsrep_next_trx_id()); +#endif /* WITH_WSREP */ } /** Assign a new value to thd->mysys_var. */ @@ -5936,9 +5980,27 @@ int THD::decide_logging_format(TABLE_LIST *tables) binlogging is off, or if the statement is filtered out from the binlog by filtering rules. */ +#ifdef WITH_WSREP + if (WSREP_CLIENT_NNULL(this) && variables.wsrep_trx_fragment_size > 0) + { + if (!is_current_stmt_binlog_format_row()) + { + my_message(ER_NOT_SUPPORTED_YET, + "Streaming replication not supported with " + "binlog_format=STATEMENT", MYF(0)); + DBUG_RETURN(-1); + } + } + + if ((WSREP_EMULATE_BINLOG_NNULL(this) || + (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG))) && + !(wsrep_binlog_format() == BINLOG_FORMAT_STMT && + !binlog_filter->db_ok(db.str))) +#else if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) && !(wsrep_binlog_format() == BINLOG_FORMAT_STMT && !binlog_filter->db_ok(db.str))) +#endif /* WITH_WSREP */ { if (is_bulk_op()) @@ -6260,7 +6322,8 @@ int THD::decide_logging_format(TABLE_LIST *tables) 5. Error: Cannot modify table that uses a storage engine limited to row-logging when binlog_format = STATEMENT */ - if (IF_WSREP((!WSREP(this) || wsrep_exec_mode == LOCAL_STATE),1)) + if (IF_WSREP((!WSREP(this) || + wsrep_cs().mode() == wsrep::client_state::m_local),1)) { my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); } diff --git a/sql/sql_class.h b/sql/sql_class.h index 69fabee708c..55eceda6c9f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -60,8 +60,18 @@ void set_thd_stage_info(void *thd, #include "my_apc.h" #include "rpl_gtid.h" + #include "wsrep_mysqld.h" +#ifdef WITH_WSREP +/* wsrep-lib */ +#include "wsrep_client_service.h" +#include "wsrep_client_state.h" +#include "wsrep_mutex.h" +#include "wsrep_condition_variable.h" +class Wsrep_applier_service; + +#endif /* WITH_WSREP */ class Reprepare_observer; class Relay_log_info; struct rpl_group_info; @@ -712,10 +722,12 @@ typedef struct system_variables my_bool wsrep_on; my_bool wsrep_causal_reads; + uint wsrep_sync_wait; + ulong wsrep_retry_autocommit; + ulonglong wsrep_trx_fragment_size; + ulong wsrep_trx_fragment_unit; + ulong wsrep_OSU_method; my_bool wsrep_dirty_reads; - uint wsrep_sync_wait; - ulong wsrep_retry_autocommit; - ulong wsrep_OSU_method; double long_query_time_double, max_statement_time_double; my_bool pseudo_slave_mode; @@ -2235,7 +2247,7 @@ public: - thd->db (used in SHOW PROCESSLIST) Is locked when THD is deleted. */ - mysql_mutex_t LOCK_thd_data; + mutable mysql_mutex_t LOCK_thd_data; /* Protects: - kill information @@ -3196,7 +3208,6 @@ public: mysql_bin_log.start_union_events() call. */ bool unioned_events_trans; - /* 'queries' (actually SP statements) that run under inside this binlog union have thd->query_id >= first_query_id. @@ -3204,7 +3215,6 @@ public: query_id_t first_query_id; } binlog_evt_union; - mysql_cond_t COND_wsrep_thd; /** Internal parser state. Note that since the parser is not re-entrant, we keep only one parser @@ -3287,9 +3297,18 @@ public: void awake_no_mutex(killed_state state_to_set); void awake(killed_state state_to_set) { + bool wsrep_on_local= WSREP_ON; + /* + mutex locking order (LOCK_thd_data - LOCK_thd_kill)) requires + to grab LOCK_thd_data here + */ + if (wsrep_on_local) + mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_lock(&LOCK_thd_kill); awake_no_mutex(state_to_set); mysql_mutex_unlock(&LOCK_thd_kill); + if (wsrep_on_local) + mysql_mutex_unlock(&LOCK_thd_data); } /** Disconnect the associated communication endpoint. */ @@ -4497,6 +4516,13 @@ public: void set_query_id(query_id_t new_query_id) { query_id= new_query_id; +#ifdef WITH_WSREP + if (WSREP(this)) + { + set_wsrep_next_trx_id(query_id); + WSREP_DEBUG("assigned new next trx id: %lu", wsrep_next_trx_id()); + } +#endif /* WITH_WSREP */ } void set_open_tables(TABLE *open_tables_arg) { @@ -4752,52 +4778,114 @@ private: public: inline ulong wsrep_binlog_format() const { - return WSREP_FORMAT(variables.binlog_format); + return WSREP_BINLOG_FORMAT(variables.binlog_format); } #ifdef WITH_WSREP - const bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_applier; /* dedicated slave applier thread */ bool wsrep_applier_closing; /* applier marked to close */ bool wsrep_client_thread; /* to identify client threads*/ - bool wsrep_PA_safe; - bool wsrep_converted_lock_session; - bool wsrep_apply_toi; /* applier processing in TOI */ - enum wsrep_exec_mode wsrep_exec_mode; query_id_t wsrep_last_query_id; - enum wsrep_query_state wsrep_query_state; - enum wsrep_conflict_state wsrep_conflict_state; - wsrep_trx_meta_t wsrep_trx_meta; + XID wsrep_xid; + + /** This flag denotes that record locking should be skipped during INSERT + and gap locking during SELECT. Only used by the streaming replication thread + that only modifies the wsrep_schema.SR table. */ + my_bool wsrep_skip_locking; + + mysql_cond_t COND_wsrep_thd; + + // changed from wsrep_seqno_t to wsrep_trx_meta_t in wsrep API rev 75 uint32 wsrep_rand; - Relay_log_info *wsrep_rli; rpl_group_info *wsrep_rgi; - wsrep_ws_handle_t wsrep_ws_handle; + bool wsrep_converted_lock_session; + char wsrep_info[128]; /* string for dynamic proc info */ ulong wsrep_retry_counter; // of autocommit - char *wsrep_retry_query; + bool wsrep_PA_safe; + char* wsrep_retry_query; size_t wsrep_retry_query_len; enum enum_server_command wsrep_retry_command; - enum wsrep_consistency_check_mode + enum wsrep_consistency_check_mode wsrep_consistency_check; + std::vector<wsrep::provider::status_variable> wsrep_status_vars; int wsrep_mysql_replicated; - const char *wsrep_TOI_pre_query; /* a query to apply before - the actual TOI query */ + const char* wsrep_TOI_pre_query; /* a query to apply before + the actual TOI query */ size_t wsrep_TOI_pre_query_len; wsrep_po_handle_t wsrep_po_handle; size_t wsrep_po_cnt; #ifdef GTID_SUPPORT + my_bool wsrep_po_in_trans; rpl_sid wsrep_po_sid; -#endif /* GTID_SUPPORT */ +#endif /* GTID_SUPPORT */ void *wsrep_apply_format; - char wsrep_info[128]; /* string for dynamic proc info */ + bool wsrep_apply_toi; /* applier processing in TOI */ + uchar* wsrep_rbr_buf; + wsrep_gtid_t wsrep_sync_wait_gtid; + // wsrep_gtid_t wsrep_last_written_gtid; + ulong wsrep_affected_rows; + bool wsrep_has_ignored_error; + bool wsrep_replicate_GTID; + /* When enabled, do not replicate/binlog updates from the current table that's being processed. At the moment, it is used to keep mysql.gtid_slave_pos table updates from being replicated to other nodes via galera replication. */ bool wsrep_ignore_table; - wsrep_gtid_t wsrep_sync_wait_gtid; - ulong wsrep_affected_rows; - bool wsrep_replicate_GTID; - bool wsrep_skip_wsrep_GTID; + + + /* + Transaction id: + * m_wsrep_next_trx_id is assigned on the first query after + wsrep_next_trx_id() return WSREP_UNDEFINED_TRX_ID + * Each storage engine must assign value of wsrep_next_trx_id() + when the transaction starts. + * Effective transaction id is returned via wsrep_trx_id() + */ + /* + Return effective transaction id + */ + wsrep_trx_id_t wsrep_trx_id() const + { + return m_wsrep_client_state.transaction().id().get(); + } + + + /* + Set next trx id + */ + void set_wsrep_next_trx_id(query_id_t query_id) + { + m_wsrep_next_trx_id = (wsrep_trx_id_t) query_id; + } + /* + Return next trx id + */ + wsrep_trx_id_t wsrep_next_trx_id() const + { + return m_wsrep_next_trx_id; + } + +private: + wsrep_trx_id_t m_wsrep_next_trx_id; /* cast from query_id_t */ + /* wsrep-lib */ + Wsrep_mutex m_wsrep_mutex; + Wsrep_condition_variable m_wsrep_cond; + Wsrep_client_service m_wsrep_client_service; + Wsrep_client_state m_wsrep_client_state; + +public: + Wsrep_client_state& wsrep_cs() { return m_wsrep_client_state; } + const Wsrep_client_state& wsrep_cs() const { return m_wsrep_client_state; } + const wsrep::transaction& wsrep_trx() const + { return m_wsrep_client_state.transaction(); } + const wsrep::streaming_context& wsrep_sr() const + { return m_wsrep_client_state.transaction().streaming_context(); } + /* Pointer to applier service for streaming THDs. This is needed to + be able to delete applier service object in case of background + rollback. */ + Wsrep_applier_service* wsrep_applier_service; #endif /* WITH_WSREP */ /* Handling of timeouts for commands */ @@ -6304,7 +6392,7 @@ public: be rolled back or that do not expect any previously metadata locked tables. */ -#define CF_IMPLICT_COMMIT_BEGIN (1U << 6) +#define CF_IMPLICIT_COMMIT_BEGIN (1U << 6) /** Implicitly commit after the SQL statement. @@ -6322,7 +6410,7 @@ public: before and after every DDL statement and any statement that modifies our currently non-transactional system tables. */ -#define CF_AUTO_COMMIT_TRANS (CF_IMPLICT_COMMIT_BEGIN | CF_IMPLICIT_COMMIT_END) +#define CF_AUTO_COMMIT_TRANS (CF_IMPLICIT_COMMIT_BEGIN | CF_IMPLICIT_COMMIT_END) /** Diagnostic statement. @@ -6398,6 +6486,14 @@ public: */ #define CF_DB_CHANGE (1U << 22) +#ifdef WITH_WSREP +/** + DDL statement that may be subject to error filtering. +*/ +#define CF_WSREP_MAY_IGNORE_ERRORS (1U << 23) +#endif /* WITH_WSREP */ + + /* Bits in server_command_flags */ /** diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index b48070b9c8f..ec46d84c7ce 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -37,7 +37,11 @@ // reset_host_errors #include "sql_acl.h" // acl_getroot, NO_ACCESS, SUPER_ACL #include "sql_callback.h" + +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" /* wsrep open/close */ #include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ #include "proxy_protocol.h" HASH global_user_stats, global_client_stats, global_table_stats; @@ -1177,17 +1181,6 @@ exit: void end_connection(THD *thd) { NET *net= &thd->net; -#ifdef WITH_WSREP - if (WSREP(thd)) - { - wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id); - if (rcode) { - WSREP_WARN("wsrep failed to free connection context: %lld code: %d", - (longlong) thd->thread_id, rcode); - } - } - thd->wsrep_client_thread= 0; -#endif plugin_thdvar_cleanup(thd); if (thd->user_connect) @@ -1322,7 +1315,7 @@ bool thd_prepare_connection(THD *thd) prepare_new_connection_state(thd); #ifdef WITH_WSREP - thd->wsrep_client_thread= 1; + thd->wsrep_client_thread= true; #endif /* WITH_WSREP */ return FALSE; } @@ -1395,6 +1388,9 @@ void do_handle_one_connection(CONNECT *connect) create_user= FALSE; goto end_thread; } +#ifdef WITH_WSREP + wsrep_open(thd); +#endif /* WITH_WSREP */ while (thd_is_connection_alive(thd)) { @@ -1405,13 +1401,9 @@ void do_handle_one_connection(CONNECT *connect) end_connection(thd); #ifdef WITH_WSREP - if (WSREP(thd)) - { - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_query_state= QUERY_EXITING; - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif + wsrep_close(thd); +#endif /* WITH_WSREP */ + end_thread: close_connection(thd); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index f5e4185db92..785d7c12bd2 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -82,6 +82,10 @@ #include "debug_sync.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" /* wsrep_start_transction() */ +#endif /* WITH_WSREP */ + #ifndef EMBEDDED_LIBRARY static bool delayed_get_table(THD *thd, MDL_request *grl_protection_request, TABLE_LIST *table_list); @@ -3934,10 +3938,13 @@ bool select_insert::prepare_eof() DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'", trans_table, table->file->table_type())); - error= (IF_WSREP((thd->wsrep_conflict_state == MUST_ABORT || - thd->wsrep_conflict_state == CERT_FAILURE) ? -1 :, ) - (thd->locked_tables_mode <= LTM_LOCK_TABLES ? - table->file->ha_end_bulk_insert() : 0)); +#ifdef WITH_WSREP + error= (thd->wsrep_cs().current_error()) ? -1 : + (thd->locked_tables_mode <= LTM_LOCK_TABLES) ? +#else + error= (thd->locked_tables_mode <= LTM_LOCK_TABLES) ? +#endif /* WITH_WSREP */ + table->file->ha_end_bulk_insert() : 0; if (likely(!error) && unlikely(thd->is_error())) error= thd->get_stmt_da()->sql_errno(); @@ -4534,9 +4541,16 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) /* suppress_use */ FALSE, errcode); } - - ha_fake_trx_id(thd); - +#ifdef WITH_WSREP + if (thd->wsrep_trx().active()) + { + WSREP_DEBUG("transaction already started for CTAS"); + } + else + { + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } +#endif return result; } @@ -4594,10 +4608,18 @@ bool select_create::send_eof() if (!table->s->tmp_table) { #ifdef WITH_WSREP - if (WSREP_ON) + if (WSREP(thd)) { + if (thd->wsrep_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + DBUG_ASSERT(thd->wsrep_trx_id() != WSREP_UNDEFINED_TRX_ID); + WSREP_DEBUG("CTAS key append for trx: %lu thd %llu query %lld ", + thd->wsrep_trx_id(), thd->thread_id, thd->query_id); + /* - append table level exclusive key for CTAS + append table level exclusive key for CTAS */ wsrep_key_arr_t key_arr= {0, 0}; wsrep_prepare_keys_for_isolation(thd, @@ -4605,38 +4627,34 @@ bool select_create::send_eof() create_table->table_name.str, table_list, &key_arr); - int rcode = wsrep->append_key( - wsrep, - &thd->wsrep_ws_handle, - key_arr.keys, //&wkey, - key_arr.keys_len, - WSREP_KEY_EXCLUSIVE, - false); + int rcode= wsrep_thd_append_key(thd, key_arr.keys, key_arr.keys_len, + WSREP_SERVICE_KEY_EXCLUSIVE); wsrep_keys_free(&key_arr); - if (rcode) { + if (rcode) + { DBUG_PRINT("wsrep", ("row key failed: %d", rcode)); WSREP_ERROR("Appending table key for CTAS failed: %s, %d", (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", rcode); - return true; + DBUG_RETURN(true); } /* If commit fails, we should be able to reset the OK status. */ - thd->get_stmt_da()->set_overwrite_status(TRUE); + thd->get_stmt_da()->set_overwrite_status(true); } #endif /* WITH_WSREP */ trans_commit_stmt(thd); if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) trans_commit_implicit(thd); #ifdef WITH_WSREP - if (WSREP_ON) + if (WSREP(thd)) { thd->get_stmt_da()->set_overwrite_status(FALSE); mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state != NO_CONFLICT) + if (wsrep_current_error(thd)) { - WSREP_DEBUG("select_create commit failed, thd: %lld err: %d %s", - (longlong) thd->thread_id, thd->wsrep_conflict_state, - thd->query()); + WSREP_DEBUG("select_create commit failed, thd: %llu err: %s %s", + thd->thread_id, + wsrep_thd_transaction_state_str(thd), WSREP_QUERY(thd)); mysql_mutex_unlock(&thd->LOCK_thd_data); abort_result_set(); DBUG_RETURN(true); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 416180f0006..24de741cefd 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1860,7 +1860,7 @@ int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd) else { #ifdef WITH_WSREP - if (WSREP(thd) && version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE) + if (WSREP(thd) && version == 99997 && wsrep_thd_is_local(thd)) { WSREP_DEBUG("consistency check: %s", thd->query()); thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index dd6e723c953..c95ef72a308 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -42,6 +42,8 @@ #include "sql_derived.h" #include "sql_show.h" +#include "wsrep_mysqld.h" + extern "C" int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t Count); class XML_TAG { @@ -106,7 +108,7 @@ static bool wsrep_load_data_split(THD *thd, const TABLE *table, { DBUG_ENTER("wsrep_load_data_split"); - if (!wsrep_load_data_splitting || !wsrep_on(thd) + if (!wsrep_load_data_splitting || !WSREP(thd) || !info.records || (info.records % 10000) || !thd->transaction.stmt.ha_list || thd->transaction.stmt.ha_list->ht() != binlog_hton @@ -116,13 +118,10 @@ static bool wsrep_load_data_split(THD *thd, const TABLE *table, if (handlerton* hton= thd->transaction.stmt.ha_list->next()->ht()) { - if (hton->db_type != DB_TYPE_INNODB) + if (!(hton->flags & HTON_WSREP_REPLICATION)) DBUG_RETURN(false); WSREP_DEBUG("intermediate transaction commit in LOAD DATA"); - if (wsrep_run_wsrep_commit(thd, true) != WSREP_TRX_OK) DBUG_RETURN(true); - if (binlog_hton->commit(binlog_hton, thd, true)) DBUG_RETURN(true); - wsrep_post_commit(thd, true); - hton->commit(hton, thd, true); + wsrep_tc_log_commit(thd); table->file->extra(HA_EXTRA_FAKE_START_STMT); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 65b52b5b5da..34c690416ad 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -111,13 +111,16 @@ #include "wsrep.h" #include "wsrep_mysqld.h" +#ifdef WITH_WSREP #include "wsrep_thd.h" +#include "wsrep_trans_observer.h" /* wsrep transaction hooks */ -static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, +static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, Parser_state *parser_state, bool is_com_multi, bool is_next_command); +#endif /* WITH_WSREP */ /** @defgroup Runtime_Environment Runtime Environment @{ @@ -884,6 +887,16 @@ void init_update_queries(void) sql_command_flags[SQLCOM_REVOKE_ALL]|= CF_DISALLOW_IN_RO_TRANS; sql_command_flags[SQLCOM_INSTALL_PLUGIN]|= CF_DISALLOW_IN_RO_TRANS; sql_command_flags[SQLCOM_UNINSTALL_PLUGIN]|= CF_DISALLOW_IN_RO_TRANS; +#ifdef WITH_WSREP + /* + Statements for which some errors are ignored when + wsrep_ignore_apply_errors = WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL + */ + sql_command_flags[SQLCOM_DROP_DB]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_DROP_TABLE]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_DROP_INDEX]|= CF_WSREP_MAY_IGNORE_ERRORS; + sql_command_flags[SQLCOM_ALTER_TABLE]|= CF_WSREP_MAY_IGNORE_ERRORS; +#endif /* WITH_WSREP */ } bool sqlcom_can_generate_row_events(const THD *thd) @@ -1208,28 +1221,11 @@ bool do_command(THD *thd) { bool return_value; char *packet= 0; -#ifdef WITH_WSREP - ulong packet_length= 0; // just to avoid (false positive) compiler warning -#else ulong packet_length; -#endif /* WITH_WSREP */ NET *net= &thd->net; enum enum_server_command command; DBUG_ENTER("do_command"); -#ifdef WITH_WSREP - if (WSREP(thd)) - { - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_query_state= QUERY_IDLE; - if (thd->wsrep_conflict_state==MUST_ABORT) - { - wsrep_client_rollback(thd); - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ - /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -1270,29 +1266,6 @@ bool do_command(THD *thd) DEBUG_SYNC(thd, "before_do_command_net_read"); packet_length= my_net_read_packet(net, 1); -#ifdef WITH_WSREP - if (WSREP(thd)) { - mysql_mutex_lock(&thd->LOCK_thd_data); - - /* these THD's are aborted or are aborting during being idle */ - if (thd->wsrep_conflict_state == ABORTING) - { - while (thd->wsrep_conflict_state == ABORTING) { - mysql_mutex_unlock(&thd->LOCK_thd_data); - my_sleep(1000); - mysql_mutex_lock(&thd->LOCK_thd_data); - } - thd->store_globals(); - } - else if (thd->wsrep_conflict_state == ABORTED) - { - thd->store_globals(); - } - - thd->wsrep_query_state= QUERY_EXEC; - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ if (unlikely(packet_length == packet_error)) { @@ -1300,20 +1273,6 @@ bool do_command(THD *thd) net->error, vio_description(net->vio))); -#ifdef WITH_WSREP - if (WSREP(thd)) - { - mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == MUST_ABORT) - { - DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", - (ulong) thd->real_id)); - wsrep_client_rollback(thd); - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ - /* Instrument this broken statement as "statement/com/error" */ thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, com_statement_info[COM_END]. @@ -1364,13 +1323,52 @@ bool do_command(THD *thd) command= fetch_command(thd, packet); #ifdef WITH_WSREP + /* + Aborted by background rollbacker thread. + Handle error here and jump straight to out + */ + if (wsrep_before_command(thd)) + { + thd->store_globals(); + WSREP_LOG_THD(thd, "enter found BF aborted"); + DBUG_ASSERT(!thd->mdl_context.has_locks()); + DBUG_ASSERT(!thd->get_stmt_da()->is_set()); + /* We let COM_QUIT and COM_STMT_CLOSE to execute even if wsrep aborted. */ + if (command != COM_STMT_CLOSE && + command != COM_QUIT) + { + my_error(ER_LOCK_DEADLOCK, MYF(0)); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + thd->reset_killed(); + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + + /* Instrument this broken statement as "statement/com/error" */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[COM_END]. + m_key); + + thd->protocol->end_statement(); + + /* Mark the statement completed. */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + return_value= FALSE; + + wsrep_after_command_before_result(thd); + goto out; + } + } + if (WSREP(thd)) { /* - Bail out if DB snapshot has not been installed. - */ - if (!thd->wsrep_applier && - (!wsrep_ready || wsrep_reject_queries != WSREP_REJECT_NONE) && + * bail out if DB snapshot has not been installed. We however, + * allow queries "SET" and "SHOW", they are trapped later in execute_command + */ + if (!(thd->wsrep_applier) && + (!wsrep_ready_get() || wsrep_reject_queries != WSREP_REJECT_NONE) && (server_command_flags[command] & CF_SKIP_WSREP_CHECK) == 0) { my_message(ER_UNKNOWN_COM_ERROR, @@ -1383,11 +1381,11 @@ bool do_command(THD *thd) thd->m_digest= NULL; return_value= FALSE; + wsrep_after_command_before_result(thd); goto out; } } -#endif - +#endif /* WITH_WSREP */ /* Restore read timeout value */ my_net_set_read_timeout(net, thd->variables.net_read_timeout); @@ -1395,37 +1393,6 @@ bool do_command(THD *thd) DBUG_ASSERT(!thd->apc_target.is_enabled()); return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1), FALSE, FALSE); -#ifdef WITH_WSREP - if (WSREP(thd)) - { - while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) - { - WSREP_DEBUG("Retry autocommit for: %s\n", thd->wsrep_retry_query); - CHARSET_INFO *current_charset = thd->variables.character_set_client; - if (!is_supported_parser_charset(current_charset)) - { - /* Do not use non-supported parser character sets */ - WSREP_WARN("Current client character set is non-supported parser " - "character set: %s", current_charset->csname); - thd->variables.character_set_client = &my_charset_latin1; - WSREP_WARN("For retry temporally setting character set to : %s", - my_charset_latin1.csname); - } - thd->clear_error(); - return_value= dispatch_command(command, thd, thd->wsrep_retry_query, - thd->wsrep_retry_query_len, FALSE, FALSE); - thd->variables.character_set_client = current_charset; - } - - if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING) - { - my_free(thd->wsrep_retry_query); - thd->wsrep_retry_query = NULL; - thd->wsrep_retry_query_len = 0; - thd->wsrep_retry_command = COM_CONNECT; - } - } -#endif /* WITH_WSREP */ DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1433,6 +1400,13 @@ out: /* The statement instrumentation must be closed in all cases. */ DBUG_ASSERT(thd->m_digest == NULL); DBUG_ASSERT(thd->m_statement_psi == NULL); +#ifdef WITH_WSREP + if (packet_length != packet_error) + { + /* there was a command to process, and before_command() has been called */ + wsrep_after_command_after_result(thd); + } +#endif /* WITH_WSREP */ DBUG_RETURN(return_value); } #endif /* EMBEDDED_LIBRARY */ @@ -1498,6 +1472,36 @@ static bool deny_updates_if_read_only_option(THD *thd, TABLE_LIST *all_tables) DBUG_RETURN(FALSE); } +#ifdef WITH_WSREP +static my_bool wsrep_read_only_option(THD *thd, TABLE_LIST *all_tables) +{ + int opt_readonly_saved = opt_readonly; + ulong flag_saved = (ulong)(thd->security_ctx->master_access & SUPER_ACL); + + opt_readonly = 0; + thd->security_ctx->master_access &= ~SUPER_ACL; + + my_bool ret = !deny_updates_if_read_only_option(thd, all_tables); + + opt_readonly = opt_readonly_saved; + thd->security_ctx->master_access |= flag_saved; + + return ret; +} + +static void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->get_command(); + thd->wsrep_retry_query_len = thd->query_length(); + if (thd->wsrep_retry_query) { + my_free(thd->wsrep_retry_query); + } + thd->wsrep_retry_query = (char *)my_malloc( + thd->wsrep_retry_query_len + 1, MYF(0)); + strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} +#endif /* WITH_WSREP */ /** check COM_MULTI packet @@ -1580,41 +1584,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, /* keep it withing 1 byte */ compile_time_assert(COM_END == 255); -#ifdef WITH_WSREP - if (WSREP(thd)) - { - if (!thd->in_multi_stmt_transaction_mode()) - { - thd->wsrep_PA_safe= true; - } - - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_query_state= QUERY_EXEC; - if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) - { - thd->wsrep_conflict_state= NO_CONFLICT; - } - if (thd->wsrep_conflict_state== MUST_ABORT) - { - wsrep_client_rollback(thd); - } - /* We let COM_QUIT and COM_STMT_CLOSE to execute even if wsrep aborted. */ - if (thd->wsrep_conflict_state == ABORTED && - command != COM_STMT_CLOSE && command != COM_QUIT) - { - mysql_mutex_unlock(&thd->LOCK_thd_data); - my_message(ER_LOCK_DEADLOCK, "Deadlock: wsrep aborted transaction", - MYF(0)); - WSREP_DEBUG("Deadlock error for: %s", thd->query()); - thd->reset_killed(); - thd->mysys_var->abort = 0; - thd->wsrep_conflict_state = NO_CONFLICT; - thd->wsrep_retry_counter = 0; - goto dispatch_end; - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ #if defined(ENABLED_PROFILING) thd->profiling.start_new_query(); #endif @@ -1661,6 +1630,13 @@ bool dispatch_command(enum enum_server_command command, THD *thd, */ thd->set_query_id(get_query_id()); } +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + thd->set_wsrep_next_trx_id(thd->query_id); + WSREP_DEBUG("assigned new next trx id: %lu", thd->wsrep_next_trx_id()); + } +#endif /* WITH_WSREP */ if (!(server_command_flags[command] & CF_SKIP_QUESTIONS)) statistic_increment(thd->status_var.questions, &LOCK_status); @@ -1844,10 +1820,24 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (unlikely(parser_state.init(thd, thd->query(), thd->query_length()))) break; +#ifdef WITH_WSREP if (WSREP_ON) - wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - is_com_multi, is_next_command); + { + if (wsrep_mysql_parse(thd, thd->query(), thd->query_length(), + &parser_state, + is_com_multi, is_next_command)) + { + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->killed = NOT_KILLED; + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + mysql_mutex_unlock(&thd->LOCK_thd_data); + goto dispatch_end; + } + } else +#endif /* WITH_WSREP */ mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, is_com_multi, is_next_command); @@ -1929,17 +1919,32 @@ bool dispatch_command(enum enum_server_command command, THD *thd, */ statistic_increment(thd->status_var.questions, &LOCK_status); - if(!WSREP(thd)) - thd->set_time(); /* Reset the query start time. */ + if (!WSREP(thd)) + thd->set_time(); /* Reset the query start time. */ parser_state.reset(beginning_of_next_stmt, length); +#ifdef WITH_WSREP if (WSREP_ON) - wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state, - is_com_multi, is_next_command); + { + if (wsrep_mysql_parse(thd, beginning_of_next_stmt, + length, &parser_state, + is_com_multi, is_next_command)) + { + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->killed = NOT_KILLED; + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + mysql_mutex_unlock(&thd->LOCK_thd_data); + + goto dispatch_end; + } + } else - mysql_parse(thd, beginning_of_next_stmt, length, &parser_state, - is_com_multi, is_next_command); +#endif /* WITH_WSREP */ + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state, + is_com_multi, is_next_command); } @@ -2376,7 +2381,26 @@ com_multi_end: #ifdef WITH_WSREP dispatch_end: - + /* + BF aborted before sending response back to client + */ + if (thd->killed == KILL_QUERY) + { + WSREP_DEBUG("THD is killed at dispatch_end"); + } + wsrep_after_command_before_result(thd); + if (wsrep_current_error(thd) && + !(command == COM_STMT_PREPARE || + command == COM_STMT_FETCH || + command == COM_STMT_SEND_LONG_DATA || + command == COM_STMT_CLOSE + )) + { + /* todo: Pass wsrep client state current error to override */ + wsrep_override_error(thd, wsrep_current_error(thd), + wsrep_current_error_status(thd)); + WSREP_LOG_THD(thd, "leave"); + } if (WSREP(thd)) { /* @@ -2387,9 +2411,10 @@ com_multi_end: || thd->get_stmt_da()->is_disabled()); /* wsrep BF abort in query exec phase */ mysql_mutex_lock(&thd->LOCK_thd_data); - do_end_of_statement= thd->wsrep_conflict_state != REPLAYING && - thd->wsrep_conflict_state != RETRY_AUTOCOMMIT && - !thd->killed; + do_end_of_statement= + thd->wsrep_trx().state() != wsrep::transaction::s_replaying + && !thd->killed; + mysql_mutex_unlock(&thd->LOCK_thd_data); } else @@ -3423,7 +3448,7 @@ mysql_execute_command(THD *thd) } /* endif unlikely slave */ #endif #ifdef WITH_WSREP - if (wsrep && WSREP(thd)) + if (WSREP(thd)) { /* change LOCK TABLE WRITE to transaction @@ -3453,8 +3478,8 @@ mysql_execute_command(THD *thd) * allow SET and SHOW queries and reads from information schema * and dirty reads (if configured) */ - if (!thd->wsrep_applier && - !(wsrep_ready && wsrep_reject_queries == WSREP_REJECT_NONE) && + if (!(thd->wsrep_applier) && + !(wsrep_ready_get() && wsrep_reject_queries == WSREP_REJECT_NONE) && !(thd->variables.wsrep_dirty_reads && (sql_command_flags[lex->sql_command] & CF_CHANGES_DATA) == 0) && !wsrep_tables_accessible_when_detached(all_tables) && @@ -3619,7 +3644,7 @@ mysql_execute_command(THD *thd) not run in it's own transaction it may simply never appear on the slave in case the outside transaction rolls back. */ - if (stmt_causes_implicit_commit(thd, CF_IMPLICT_COMMIT_BEGIN)) + if (stmt_causes_implicit_commit(thd, CF_IMPLICIT_COMMIT_BEGIN)) { /* Note that this should never happen inside of stored functions @@ -3642,6 +3667,13 @@ mysql_execute_command(THD *thd) } } thd->transaction.stmt.mark_trans_did_ddl(); +#ifdef WITH_WSREP + /* Clean up the previous transaction on implicit commit */ + if (wsrep_thd_is_local(thd) && wsrep_after_statement(thd)) + { + goto error; + } +#endif /* WITH_WSREP */ } #ifndef DBUG_OFF @@ -3690,6 +3722,33 @@ mysql_execute_command(THD *thd) /* Start timeouts */ thd->set_query_timer(); +#ifdef WITH_WSREP + /* + Always start a new transaction for a wsrep THD unless the + current command is DDL or explicit BEGIN. This will guarantee that + the THD is BF abortable even if it does not generate any + changes and takes only read locks. If the statement does not + start a multi STMT transaction, the wsrep_transaction is + committed as empty at the end of this function. + + Transaction is started for BEGIN in trans_begin(), for DDL the + implicit commit took care of committing previous transaction + above and a new transaction should not be started. + + Do not start transaction for stored procedures, it will be handled + internally in SP processing. + */ + if (WSREP(thd) && + wsrep_thd_is_local(thd) && + lex->sql_command != SQLCOM_BEGIN && + lex->sql_command != SQLCOM_CALL && + lex->sql_command != SQLCOM_EXECUTE && + !(sql_command_flags[lex->sql_command] & CF_AUTO_COMMIT_TRANS)) + { + wsrep_start_trx_if_not_started(thd); + } +#endif /* WITH_WSREP */ + switch (lex->sql_command) { case SQLCOM_SHOW_EVENTS: @@ -3749,12 +3808,16 @@ mysql_execute_command(THD *thd) case SQLCOM_SHOW_STORAGE_ENGINES: case SQLCOM_SHOW_PROFILE: case SQLCOM_SELECT: - { + { #ifdef WITH_WSREP - if (lex->sql_command == SQLCOM_SELECT) - WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_READ) - else - WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW) + if (lex->sql_command == SQLCOM_SELECT) + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_READ); + } + else + { + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); + } #endif /* WITH_WSREP */ thd->status_var.last_query_cost= 0.0; @@ -4555,9 +4618,7 @@ end_with_restore_list: WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); ha_rows found= 0, updated= 0; DBUG_ASSERT(first_table == all_tables && first_table != 0); - if (WSREP_CLIENT(thd) && - wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) - goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); if (update_precheck(thd, all_tables)) break; @@ -4706,9 +4767,7 @@ end_with_restore_list: WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE); DBUG_ASSERT(first_table == all_tables && first_table != 0); - if (WSREP_CLIENT(thd) && - wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) - goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE); /* Since INSERT DELAYED doesn't support temporary tables, we could @@ -4766,9 +4825,7 @@ end_with_restore_list: select_insert *sel_result; bool explain= MY_TEST(lex->describe); DBUG_ASSERT(first_table == all_tables && first_table != 0); - if (WSREP_CLIENT(thd) && - wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) - goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); if ((res= insert_precheck(thd, all_tables))) break; @@ -4888,9 +4945,7 @@ end_with_restore_list: WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); select_result *sel_result=lex->result; DBUG_ASSERT(first_table == all_tables && first_table != 0); - if (WSREP_CLIENT(thd) && - wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) - goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); if ((res= delete_precheck(thd, all_tables))) break; @@ -4950,9 +5005,7 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; multi_delete *result; - if (WSREP_CLIENT(thd) && - wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) - goto error; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE); if ((res= multi_delete_precheck(thd, all_tables))) break; @@ -5793,6 +5846,7 @@ end_with_restore_list: thd->mdl_context.release_transactional_locks(); WSREP_DEBUG("BEGIN failed, MDL released: %lld", (longlong) thd->thread_id); + WSREP_DEBUG("stmt_da, sql_errno: %d", (thd->get_stmt_da()->is_error()) ? thd->get_stmt_da()->sql_errno() : 0); goto error; } my_ok(thd); @@ -5832,20 +5886,7 @@ end_with_restore_list: thd->set_killed(KILL_CONNECTION); thd->print_aborted_warning(3, "RELEASE"); } -#ifdef WITH_WSREP - if (WSREP(thd)) { - - if (thd->wsrep_conflict_state == NO_CONFLICT || - thd->wsrep_conflict_state == REPLAYING) - { - my_ok(thd); - } - } else { -#endif /* WITH_WSREP */ - my_ok(thd); -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ + my_ok(thd); break; } case SQLCOM_ROLLBACK: @@ -5881,17 +5922,7 @@ end_with_restore_list: /* Disconnect the current client connection. */ if (tx_release) thd->set_killed(KILL_CONNECTION); -#ifdef WITH_WSREP - if (WSREP(thd)) { - if (thd->wsrep_conflict_state == NO_CONFLICT) { - my_ok(thd); - } - } else { -#endif /* WITH_WSREP */ - my_ok(thd); -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ + my_ok(thd); break; } case SQLCOM_RELEASE_SAVEPOINT: @@ -6338,7 +6369,6 @@ finish: DBUG_ASSERT(!thd->in_active_multi_stmt_transaction() || thd->in_multi_stmt_transaction_mode()); - lex->unit.cleanup(); /* close/reopen tables that were marked to need reopen under LOCK TABLES */ @@ -6364,25 +6394,6 @@ finish: THD_STAGE_INFO(thd, stage_rollback); trans_rollback_stmt(thd); } -#ifdef WITH_WSREP - if (thd->spcont && - (thd->wsrep_conflict_state == MUST_ABORT || - thd->wsrep_conflict_state == ABORTED || - thd->wsrep_conflict_state == CERT_FAILURE)) - { - /* - The error was cleared, but THD was aborted by wsrep and - wsrep_conflict_state is still set accordingly. This - situation is expected if we are running a stored procedure - that declares a handler that catches ER_LOCK_DEADLOCK error. - In which case the error may have been cleared in method - sp_rcontext::handle_sql_condition(). - */ - trans_rollback_stmt(thd); - thd->wsrep_conflict_state= NO_CONFLICT; - thd->killed= NOT_KILLED; - } -#endif /* WITH_WSREP */ else { /* If commit fails, we should be able to reset the OK status. */ @@ -6398,9 +6409,6 @@ finish: /* Free tables. Set stage 'closing tables' */ close_thread_tables(thd); -#ifdef WITH_WSREP - thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; -#endif /* WITH_WSREP */ #ifndef DBUG_OFF @@ -6462,9 +6470,10 @@ finish: TRANSACT_TRACKER(add_trx_state_from_thd(thd)); - WSREP_TO_ISOLATION_END; - #ifdef WITH_WSREP + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; + + WSREP_TO_ISOLATION_END; /* Force release of transactional locks if not in active MST and wsrep is on. */ @@ -6477,11 +6486,26 @@ finish: (longlong) thd->thread_id); thd->mdl_context.release_transactional_locks(); } + + /* + Current command did not start multi STMT transaction and the command + did not cause commit to happen (e.g. read only). Commit the wsrep + transaction as empty. + */ + if (!thd->in_active_multi_stmt_transaction() && + !thd->in_sub_stmt && + thd->wsrep_trx().active() && + thd->wsrep_trx().state() == wsrep::transaction::s_executing) + { + wsrep_commit_empty(thd, true); + } + + /* assume PA safety for next transaction */ + thd->wsrep_PA_safe= true; #endif /* WITH_WSREP */ DBUG_RETURN(res || thd->is_error()); -} - + } static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) { @@ -6607,6 +6631,7 @@ static bool execute_show_status(THD *thd, TABLE_LIST *all_tables) bool res; system_status_var old_status_var= thd->status_var; thd->initial_status_var= &old_status_var; + WSREP_SYNC_WAIT(thd, WSREP_SYNC_WAIT_BEFORE_SHOW); if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE))) res= execute_sqlcom_select(thd, all_tables); @@ -6625,6 +6650,10 @@ static bool execute_show_status(THD *thd, TABLE_LIST *all_tables) offsetof(STATUS_VAR, last_cleared_system_status_var)); mysql_mutex_unlock(&LOCK_status); return res; +#ifdef WITH_WSREP +wsrep_error_label: /* see WSREP_SYNC_WAIT() macro above */ + return true; +#endif /* WITH_WSREP */ } @@ -7654,7 +7683,7 @@ void THD::reset_for_next_command(bool do_clear_error) use autoinc values passed in binlog events, not the values forced by the cluster. */ - if (WSREP(this) && wsrep_exec_mode == LOCAL_STATE && + if (WSREP(this) && wsrep_thd_is_local(this) && !slave_thread && wsrep_auto_increment_control) { variables.auto_increment_offset= @@ -7872,144 +7901,155 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } -static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, +#ifdef WITH_WSREP +static void wsrep_prepare_for_autocommit_retry(THD* thd, + char* rawbuf, + uint length, + Parser_state* parser_state) +{ + thd->clear_error(); + close_thread_tables(thd); + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + thd->set_time(); + parser_state->reset(rawbuf, length); + + /* PSI end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + /* DTRACE end */ + if (MYSQL_QUERY_DONE_ENABLED()) + { + MYSQL_QUERY_DONE(thd->is_error()); + } + + /* SHOW PROFILE end */ +#if defined(ENABLED_PROFILING) + thd->profiling.finish_current_query(); +#endif + + /* SHOW PROFILE begin */ +#if defined(ENABLED_PROFILING) + thd->profiling.start_new_query("continuing"); + thd->profiling.set_query_source(rawbuf, length); +#endif + + /* DTRACE begin */ + MYSQL_QUERY_START(rawbuf, thd->thread_id, + thd->get_db(), + &thd->security_ctx->priv_user[0], + (char *) thd->security_ctx->host_or_ip); + + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[thd->get_command()].m_key); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + + DBUG_ASSERT(thd->wsrep_trx().active() == false); + thd->wsrep_cs().reset_error(); + thd->set_query_id(next_query_id()); +} + +static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, Parser_state *parser_state, bool is_com_multi, bool is_next_command) { -#ifdef WITH_WSREP bool is_autocommit= !thd->in_multi_stmt_transaction_mode() && - thd->wsrep_conflict_state == NO_CONFLICT && - !thd->wsrep_applier; - + wsrep_read_only_option(thd, thd->lex->query_tables); + bool retry_autocommit; do { - if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) - { - thd->wsrep_conflict_state= NO_CONFLICT; - /* Performance Schema Interface instrumentation, begin */ - thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, - com_statement_info[thd->get_command()].m_key); - MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), - thd->query_length()); - - DBUG_EXECUTE_IF("sync.wsrep_retry_autocommit", - { - const char act[]= - "now " - "SIGNAL wsrep_retry_autocommit_reached " - "WAIT_FOR wsrep_retry_autocommit_continue"; - DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); - }); - WSREP_DEBUG("Retry autocommit query: %s", thd->query()); - } - - mysql_parse(thd, rawbuf, length, parser_state, is_com_multi, - is_next_command); - - if (WSREP(thd)) { - /* wsrep BF abort in query exec phase */ - mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == MUST_ABORT) { - wsrep_client_rollback(thd); - - WSREP_DEBUG("abort in exec query state, avoiding autocommit"); - } + retry_autocommit= false; + mysql_parse(thd, rawbuf, length, parser_state, is_com_multi, is_next_command); - if (thd->wsrep_conflict_state == MUST_REPLAY) - { - mysql_mutex_unlock(&thd->LOCK_thd_data); - if (thd->lex->explain) - delete_explain_query(thd->lex); - mysql_mutex_lock(&thd->LOCK_thd_data); + /* + Convert all ER_QUERY_INTERRUPTED errors to ER_LOCK_DEADLOCK + if the transaction was BF aborted. This can happen when the + transaction is being BF aborted via thd->awake() while it is + still executing. - wsrep_replay_transaction(thd); - } + Note that this must be done before wsrep_after_statement() call + since it clears the transaction for autocommit queries. + */ + if (((thd->get_stmt_da()->is_error() && + thd->get_stmt_da()->sql_errno() == ER_QUERY_INTERRUPTED) || + !thd->get_stmt_da()->is_set()) && + thd->wsrep_trx().bf_aborted()) + { + WSREP_DEBUG("overriding error: %d with DEADLOCK", + (thd->get_stmt_da()->is_error()) ? + thd->get_stmt_da()->sql_errno() : 0); - /* setting error code for BF aborted trxs */ - if (thd->wsrep_conflict_state == ABORTED || - thd->wsrep_conflict_state == CERT_FAILURE) - { - thd->reset_for_next_command(); - if (is_autocommit && - thd->lex->sql_command != SQLCOM_SELECT && - (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)) - { - mysql_mutex_unlock(&thd->LOCK_thd_data); - WSREP_DEBUG("wsrep retrying AC query: %s", - (thd->query()) ? thd->query() : "void"); - - /* Performance Schema Interface instrumentation, end */ - MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); - thd->m_statement_psi= NULL; - thd->m_digest= NULL; - // Released thd->LOCK_thd_data above as below could end up - // close_thread_tables()/close_open_tables()/close_thread_table()/mysql_mutex_lock(&thd->LOCK_thd_data) - close_thread_tables(thd); - - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_conflict_state= RETRY_AUTOCOMMIT; - thd->wsrep_retry_counter++; // grow - wsrep_copy_query(thd); - thd->set_time(); - parser_state->reset(rawbuf, length); - mysql_mutex_unlock(&thd->LOCK_thd_data); - } - else - { - mysql_mutex_unlock(&thd->LOCK_thd_data); - // This does dirty read to wsrep variables but it is only a debug code - WSREP_DEBUG("%s, thd: %lld is_AC: %d, retry: %lu - %lu SQL: %s", - (thd->wsrep_conflict_state == ABORTED) ? - "BF Aborted" : "cert failure", - (longlong) thd->thread_id, is_autocommit, - thd->wsrep_retry_counter, - thd->variables.wsrep_retry_autocommit, thd->query()); - my_message(ER_LOCK_DEADLOCK, "Deadlock: wsrep aborted transaction", - MYF(0)); - - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_conflict_state= NO_CONFLICT; - if (thd->wsrep_conflict_state != REPLAYING) - thd->wsrep_retry_counter= 0; // reset - mysql_mutex_unlock(&thd->LOCK_thd_data); - } + thd->killed = NOT_KILLED; + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + } - thd->reset_killed(); + if (wsrep_after_statement(thd) && is_autocommit) + { + thd->reset_for_next_command(); + thd->killed= NOT_KILLED; + if (is_autocommit && + thd->lex->sql_command != SQLCOM_SELECT && + thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit) + { + DBUG_EXECUTE_IF("sync.wsrep_retry_autocommit", + { + const char act[]= + "now " + "SIGNAL wsrep_retry_autocommit_reached " + "WAIT_FOR wsrep_retry_autocommit_continue"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + }); + WSREP_DEBUG("wsrep retrying AC query: %lu %s", + thd->wsrep_retry_counter, WSREP_QUERY(thd)); + wsrep_prepare_for_autocommit_retry(thd, rawbuf, length, parser_state); + if (thd->lex->explain) + delete_explain_query(thd->lex); + retry_autocommit= true; } else { - set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok - mysql_mutex_unlock(&thd->LOCK_thd_data); + WSREP_DEBUG("%s, thd: %llu is_AC: %d, retry: %lu - %lu SQL: %s", + wsrep_thd_transaction_state_str(thd), + thd->thread_id, + is_autocommit, + thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, + WSREP_QUERY(thd)); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + thd->killed= NOT_KILLED; + thd->wsrep_retry_counter= 0; // reset } } - - /* If retry is requested clean up explain structure */ - if ((thd->wsrep_conflict_state == RETRY_AUTOCOMMIT || - thd->wsrep_conflict_state == MUST_REPLAY ) - && thd->lex->explain) + else { - delete_explain_query(thd->lex); + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok } - - } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT); + } while (retry_autocommit); if (thd->wsrep_retry_query) { - WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s", - thd->wsrep_conflict_state, - thd->get_stmt_da()->is_sent(), + WSREP_DEBUG("releasing retry_query: " + "conf %s sent %d kill %d errno %d SQL %s", + wsrep_thd_transaction_state_str(thd), + thd->get_stmt_da()->is_sent(), thd->killed, - thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + thd->get_stmt_da()->is_error() ? + thd->get_stmt_da()->sql_errno() : 0, thd->wsrep_retry_query); my_free(thd->wsrep_retry_query); thd->wsrep_retry_query = NULL; thd->wsrep_retry_query_len = 0; thd->wsrep_retry_command = COM_CONNECT; } -#endif /* WITH_WSREP */ + return false; } +#endif /* WITH_WSREP */ /* @@ -8961,6 +9001,7 @@ THD *find_thread_by_id(longlong id, bool query_id) continue; if (id == (query_id ? tmp->query_id : (longlong) tmp->thread_id)) { + if (WSREP(tmp)) mysql_mutex_lock(&tmp->LOCK_thd_data); mysql_mutex_lock(&tmp->LOCK_thd_kill); // Lock from delete break; } @@ -8989,7 +9030,7 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ uint error= (type == KILL_TYPE_QUERY ? ER_NO_SUCH_QUERY : ER_NO_SUCH_THREAD); DBUG_ENTER("kill_one_thread"); DBUG_PRINT("enter", ("id: %lld signal: %u", id, (uint) kill_signal)); - + WSREP_DEBUG("kill_one_thread %llu", thd->thread_id); if (id && (tmp= find_thread_by_id(id, type == KILL_TYPE_QUERY))) { /* @@ -9013,9 +9054,14 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ faster and do a harder kill than KILL_SYSTEM_THREAD; */ +#ifdef WITH_WSREP if (((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) && - !wsrep_thd_is_BF(tmp, false)) + !wsrep_thd_is_BF(tmp, false) && !tmp->wsrep_applier) +#else + if ((thd->security_ctx->master_access & SUPER_ACL) || + thd->security_ctx->user_matches(tmp->security_ctx)) +#endif /* WITH_WSREP */ { tmp->awake_no_mutex(kill_signal); error=0; @@ -9024,6 +9070,7 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : ER_KILL_DENIED_ERROR); mysql_mutex_unlock(&tmp->LOCK_thd_kill); + if (WSREP(tmp)) mysql_mutex_unlock(&tmp->LOCK_thd_data); } DBUG_PRINT("exit", ("%d", error)); DBUG_RETURN(error); @@ -9081,7 +9128,10 @@ static uint kill_threads_for_user(THD *thd, LEX_USER *user, DBUG_RETURN(ER_KILL_DENIED_ERROR); } if (!threads_to_kill.push_back(tmp, thd->mem_root)) + { + if (WSREP(tmp)) mysql_mutex_lock(&tmp->LOCK_thd_data); mysql_mutex_lock(&tmp->LOCK_thd_kill); // Lock from delete + } } } mysql_mutex_unlock(&LOCK_thread_count); @@ -9103,6 +9153,7 @@ static uint kill_threads_for_user(THD *thd, LEX_USER *user, */ next_ptr= it2++; mysql_mutex_unlock(&ptr->LOCK_thd_kill); + if (WSREP(ptr)) mysql_mutex_unlock(&ptr->LOCK_thd_data); (*rows)++; } while ((ptr= next_ptr)); } diff --git a/sql/sql_plugin_services.ic b/sql/sql_plugin_services.ic index c730490a499..8893ea361e3 100644 --- a/sql/sql_plugin_services.ic +++ b/sql/sql_plugin_services.ic @@ -142,47 +142,35 @@ static struct thd_error_context_service_st thd_error_context_handler= { }; static struct wsrep_service_st wsrep_handler = { - get_wsrep, - get_wsrep_certify_nonPK, - get_wsrep_debug, - get_wsrep_drupal_282555_workaround, get_wsrep_recovery, - get_wsrep_load_data_splitting, - get_wsrep_log_conflicts, - get_wsrep_protocol_version, - wsrep_aborting_thd_contains, - wsrep_aborting_thd_enqueue, wsrep_consistency_check, wsrep_is_wsrep_xid, wsrep_xid_seqno, wsrep_xid_uuid, - wsrep_lock_rollback, wsrep_on, - wsrep_post_commit, - wsrep_prepare_key, - wsrep_run_wsrep_commit, + wsrep_prepare_key_for_innodb, wsrep_thd_LOCK, wsrep_thd_UNLOCK, - wsrep_thd_awake, - wsrep_thd_conflict_state, - wsrep_thd_conflict_state_str, - wsrep_thd_exec_mode, - wsrep_thd_exec_mode_str, - wsrep_thd_get_conflict_state, - wsrep_thd_is_BF, - wsrep_thd_is_wsrep, wsrep_thd_query, - wsrep_thd_query_state, - wsrep_thd_query_state_str, wsrep_thd_retry_counter, - wsrep_thd_set_conflict_state, wsrep_thd_ignore_table, wsrep_thd_trx_seqno, - wsrep_thd_ws_handle, - wsrep_trx_is_aborting, - wsrep_trx_order_before, - wsrep_unlock_rollback, - wsrep_set_data_home_dir + wsrep_thd_is_aborting, + wsrep_set_data_home_dir, + wsrep_thd_is_BF, + wsrep_thd_is_local, + wsrep_thd_self_abort, + wsrep_thd_append_key, + wsrep_thd_client_state_str, + wsrep_thd_client_mode_str, + wsrep_thd_transaction_state_str, + wsrep_thd_transaction_id, + wsrep_thd_bf_abort, + wsrep_thd_order_before, + wsrep_handle_SR_rollback, + wsrep_thd_skip_locking, + wsrep_get_sr_table_name, + wsrep_get_debug }; static struct thd_specifics_service_st thd_specifics_handler= diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index cb822fc2e98..c8cc64dba7e 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -4218,30 +4218,6 @@ reexecute: error= execute(expanded_query, open_cursor) || thd->is_error(); thd->m_reprepare_observer= NULL; -#ifdef WITH_WSREP - - if (WSREP_ON) - { - mysql_mutex_lock(&thd->LOCK_thd_data); - switch (thd->wsrep_conflict_state) - { - case CERT_FAILURE: - WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %lld err: %d", - (longlong) thd->thread_id, - thd->get_stmt_da()->sql_errno() ); - thd->wsrep_conflict_state = NO_CONFLICT; - break; - - case MUST_REPLAY: - (void) wsrep_replay_transaction(thd); - break; - - default: - break; - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ if (unlikely(error) && (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && @@ -4414,30 +4390,6 @@ reexecute: error= execute(expanded_query, open_cursor) || thd->is_error(); thd->m_reprepare_observer= NULL; -#ifdef WITH_WSREP - - if (WSREP_ON) - { - mysql_mutex_lock(&thd->LOCK_thd_data); - switch (thd->wsrep_conflict_state) - { - case CERT_FAILURE: - WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %lld err: %d", - (longlong) thd->thread_id, - thd->get_stmt_da()->sql_errno() ); - thd->wsrep_conflict_state = NO_CONFLICT; - break; - - case MUST_REPLAY: - (void) wsrep_replay_transaction(thd); - break; - - default: - break; - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } -#endif /* WITH_WSREP */ if (unlikely(error) && (sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 2ee175293de..1847416368a 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3845,6 +3845,17 @@ int reset_master(THD* thd, rpl_gtid *init_state, uint32 init_state_len, return 1; } +#ifdef WITH_WSREP + if (WSREP_ON) + { + /* RESET MASTER will initialize GTID sequence, and that would happen locally + in this node, so better reject it + */ + my_message(ER_NOT_ALLOWED_COMMAND, + "RESET MASTER not allowed when node is in cluster", MYF(0)); + return 1; + } +#endif /* WITH_WSREP */ bool ret= 0; /* Temporarily disable master semisync before reseting master. */ repl_semisync_master.before_reset_master(); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index f200f6f8c6d..11cfce172c5 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2611,9 +2611,6 @@ err: /* Chop of the last comma */ built_non_trans_tmp_query.chop(); built_non_trans_tmp_query.append(" /* generated by server */"); -#ifdef WITH_WSREP - thd->wsrep_skip_wsrep_GTID = true; -#endif /* WITH_WSREP */ error |= thd->binlog_query(THD::STMT_QUERY_TYPE, built_non_trans_tmp_query.ptr(), built_non_trans_tmp_query.length(), @@ -2626,9 +2623,6 @@ err: /* Chop of the last comma */ built_trans_tmp_query.chop(); built_trans_tmp_query.append(" /* generated by server */"); -#ifdef WITH_WSREP - thd->wsrep_skip_wsrep_GTID = true; -#endif /* WITH_WSREP */ error |= thd->binlog_query(THD::STMT_QUERY_TYPE, built_trans_tmp_query.ptr(), built_trans_tmp_query.length(), @@ -2643,9 +2637,6 @@ err: built_query.append(" /* generated by server */"); int error_code = non_tmp_error ? thd->get_stmt_da()->sql_errno() : 0; -#ifdef WITH_WSREP - thd->wsrep_skip_wsrep_GTID = false; -#endif /* WITH_WSREP */ error |= thd->binlog_query(THD::STMT_QUERY_TYPE, built_query.ptr(), built_query.length(), @@ -2694,9 +2685,6 @@ err: } end: -#ifdef WITH_WSREP - thd->wsrep_skip_wsrep_GTID = false; -#endif /* WITH_WSREP */ DBUG_RETURN(error); } diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index b79c1a1adb1..33af220ae67 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -507,8 +507,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) } #ifdef WITH_WSREP - if (thd->wsrep_exec_mode == LOCAL_STATE) - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); #endif /* We should have only one table in table list. */ diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 798e929170c..389276d0bcf 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -416,9 +416,11 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { bool hton_can_recreate; +#ifdef WITH_WSREP if (WSREP(thd) && wsrep_to_isolation_begin(thd, table_ref->db.str, table_ref->table_name.str, 0)) DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ if (lock_table(thd, table_ref, &hton_can_recreate)) DBUG_RETURN(TRUE); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 37b3d65e20a..df681e31d19 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5519,7 +5519,7 @@ static Sys_var_ulong Sys_wsrep_mysql_replication_bundle( static Sys_var_mybool Sys_wsrep_load_data_splitting( "wsrep_load_data_splitting", "To commit LOAD DATA " - "transaction after every 10K rows inserted", + "transaction after every 10K rows inserted (deprecating)", GLOBAL_VAR(wsrep_load_data_splitting), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); @@ -5539,12 +5539,48 @@ static Sys_var_mybool Sys_wsrep_restart_slave( "wsrep_restart_slave", "Should MariaDB slave be restarted automatically, when node joins back to cluster", GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); +static Sys_var_ulonglong Sys_wsrep_trx_fragment_size( + "wsrep_trx_fragment_size", + "Size of transaction fragments for streaming replication (measured in " + "units of 'wsrep_trx_fragment_unit')", + SESSION_VAR(wsrep_trx_fragment_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, WSREP_MAX_WS_SIZE), DEFAULT(0), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_trx_fragment_size_check), + ON_UPDATE(wsrep_trx_fragment_size_update)); + +extern const char *wsrep_fragment_units[]; + +static Sys_var_enum Sys_wsrep_trx_fragment_unit( + "wsrep_trx_fragment_unit", + "Unit for streaming replication transaction fragments' size: bytes, " + "rows, statements", + SESSION_VAR(wsrep_trx_fragment_unit), CMD_LINE(REQUIRED_ARG), + wsrep_fragment_units, + DEFAULT(WSREP_FRAG_BYTES), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), + ON_UPDATE(wsrep_trx_fragment_unit_update)); + +extern const char *wsrep_SR_store_types[]; +static Sys_var_enum Sys_wsrep_SR_store( + "wsrep_SR_store", "Storage for streaming replication fragments", + READ_ONLY GLOBAL_VAR(wsrep_SR_store_type), CMD_LINE(REQUIRED_ARG), + wsrep_SR_store_types, DEFAULT(WSREP_SR_STORE_TABLE), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + static Sys_var_mybool Sys_wsrep_dirty_reads( "wsrep_dirty_reads", "Allow reads even when the node is not in the primary component.", SESSION_VAR(wsrep_dirty_reads), CMD_LINE(OPT_ARG), DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG); +static Sys_var_uint Sys_wsrep_ignore_apply_errors ( + "wsrep_ignore_apply_errors", "Ignore replication errors", + GLOBAL_VAR(wsrep_ignore_apply_errors), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(WSREP_IGNORE_ERRORS_NONE, WSREP_IGNORE_ERRORS_MAX), + DEFAULT(7), BLOCK_SIZE(1)); + static Sys_var_uint Sys_wsrep_gtid_domain_id( "wsrep_gtid_domain_id", "When wsrep_gtid_mode is set, this value is " "used as gtid_domain_id for galera transactions and also copied to the " diff --git a/sql/table.cc b/sql/table.cc index 488d05b1a22..01f96b5f93b 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -247,6 +247,13 @@ TABLE_CATEGORY get_table_category(const LEX_CSTRING *db, DBUG_ASSERT(db != NULL); DBUG_ASSERT(name != NULL); +#ifdef WITH_WSREP + if (my_strcasecmp(system_charset_info, db->str, "mysql") == 0 && + my_strcasecmp(system_charset_info, name->str, "wsrep_streaming_log") == 0) + { + return TABLE_CATEGORY_INFORMATION; + } +#endif /* WITH_WSREP */ if (is_infoschema_db(db)) return TABLE_CATEGORY_INFORMATION; diff --git a/sql/transaction.cc b/sql/transaction.cc index 13614d36a73..4d61d2a120d 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -24,6 +24,9 @@ #include "debug_sync.h" // DEBUG_SYNC #include "sql_acl.h" #include "semisync_master.h" +#ifdef WITH_WSREP +#include "wsrep_trans_observer.h" +#endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY /** @@ -135,8 +138,6 @@ static bool xa_trans_force_rollback(THD *thd) by ha_rollback()/THD::transaction::cleanup(). */ thd->transaction.xid_state.rm_error= 0; - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); if (ha_rollback_trans(thd, true)) { my_error(ER_XAER_RMERR, MYF(0)); @@ -184,14 +185,16 @@ bool trans_begin(THD *thd, uint flags) (thd->variables.option_bits & OPTION_TABLE_LOCK)) { thd->variables.option_bits&= ~OPTION_TABLE_LOCK; - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); - if (WSREP_ON) - wsrep_post_commit(thd, TRUE); +#ifdef WITH_WSREP + if (wsrep_thd_is_local(thd)) + { + res= res || wsrep_after_statement(thd); + } +#endif /* WITH_WSREP */ } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -252,9 +255,14 @@ bool trans_begin(THD *thd, uint flags) } #ifdef WITH_WSREP - thd->wsrep_PA_safe= true; - if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) - DBUG_RETURN(TRUE); + if (wsrep_thd_is_local(thd)) + { + if (wsrep_sync_wait(thd)) + DBUG_RETURN(TRUE); + if (!thd->tx_read_only && + wsrep_start_transaction(thd, thd->wsrep_next_trx_id())) + DBUG_RETURN(TRUE); + } #endif /* WITH_WSREP */ thd->variables.option_bits|= OPTION_BEGIN; @@ -299,8 +307,6 @@ bool trans_commit(THD *thd) if (trans_check(thd)) DBUG_RETURN(TRUE); - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); @@ -311,8 +317,6 @@ bool trans_commit(THD *thd) mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); mysql_mutex_assert_not_owner(&LOCK_commit_ordered); - if (WSREP_ON) - wsrep_post_commit(thd, TRUE); /* if res is non-zero, then ha_commit_trans has rolled back the transaction, so the hooks for rollback will be called. @@ -368,14 +372,10 @@ bool trans_commit_implicit(THD *thd) /* Safety if one did "drop table" on locked tables */ if (!thd->locked_tables_mode) thd->variables.option_bits&= ~OPTION_TABLE_LOCK; - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); - if (WSREP_ON) - wsrep_post_commit(thd, TRUE); } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -409,14 +409,9 @@ bool trans_rollback(THD *thd) int res; DBUG_ENTER("trans_rollback"); -#ifdef WITH_WSREP - thd->wsrep_PA_safe= true; -#endif /* WITH_WSREP */ if (trans_check(thd)) DBUG_RETURN(TRUE); - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); @@ -515,14 +510,10 @@ bool trans_commit_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { - if (WSREP_ON) - wsrep_register_hton(thd, FALSE); res= ha_commit_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { trans_reset_one_shot_chistics(thd); - if (WSREP_ON) - wsrep_post_commit(thd, FALSE); } } @@ -578,8 +569,6 @@ bool trans_rollback_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { - if (WSREP_ON) - wsrep_register_hton(thd, FALSE); ha_rollback_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) trans_reset_one_shot_chistics(thd); @@ -733,7 +722,8 @@ bool trans_rollback_to_savepoint(THD *thd, LEX_CSTRING name) logging is off. */ bool mdl_can_safely_rollback_to_savepoint= - (!(mysql_bin_log.is_open() && thd->variables.sql_log_bin) || + (!((WSREP_EMULATE_BINLOG_NNULL(thd) || mysql_bin_log.is_open()) + && thd->variables.sql_log_bin) || ha_rollback_to_savepoint_can_release_mdl(thd)); if (ha_rollback_to_savepoint(thd, sv)) @@ -944,13 +934,9 @@ bool trans_xa_commit(THD *thd) } else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) { - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); int r= ha_commit_trans(thd, TRUE); if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); - if (WSREP_ON) - wsrep_post_commit(thd, TRUE); } else if (xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE) { @@ -969,8 +955,6 @@ bool trans_xa_commit(THD *thd) if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout)) { - if (WSREP_ON) - wsrep_register_hton(thd, TRUE); ha_rollback_trans(thd, TRUE); my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc index 1f50ee55711..2c4dab3bd20 100644 --- a/sql/wsrep_applier.cc +++ b/sql/wsrep_applier.cc @@ -14,12 +14,17 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "mariadb.h" +#include "mysql/service_wsrep.h" +#include "wsrep_applier.h" + #include "wsrep_priv.h" #include "wsrep_binlog.h" // wsrep_dump_rbr_buf() #include "wsrep_xid.h" +#include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "slave.h" // opt_log_slave_updates #include "log_event.h" // class THD, EVENT_LEN_OFFSET, etc. -#include "wsrep_applier.h" #include "debug_sync.h" /* @@ -27,7 +32,6 @@ At the end (*buf) is shitfed to point to the following event or NULL and (*buf_len) will be changed to account just being read bytes of the 1st event. */ - static Log_event* wsrep_read_log_event( char **arg_buf, size_t *arg_buf_len, const Format_description_log_event *description_event) @@ -35,7 +39,7 @@ static Log_event* wsrep_read_log_event( DBUG_ENTER("wsrep_read_log_event"); char *head= (*arg_buf); - uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + uint data_len= uint4korr(head + EVENT_LEN_OFFSET); char *buf= (*arg_buf); const char *error= 0; Log_event *res= 0; @@ -62,12 +66,13 @@ void wsrep_set_apply_format(THD* thd, Format_description_log_event* ev) { if (thd->wsrep_apply_format) { - delete (Format_description_log_event*)thd->wsrep_apply_format; + delete (Format_description_log_event*)thd->wsrep_apply_format; } thd->wsrep_apply_format= ev; } -Format_description_log_event* wsrep_get_apply_format(THD* thd) +Format_description_log_event* +wsrep_get_apply_format(THD* thd) { if (thd->wsrep_apply_format) { @@ -79,45 +84,77 @@ Format_description_log_event* wsrep_get_apply_format(THD* thd) return thd->wsrep_rgi->rli->relay_log.description_event_for_exec; } -static wsrep_cb_status_t wsrep_apply_events(THD* thd, - const void* events_buf, - size_t buf_len) +void wsrep_apply_error::store(const THD* const thd) { - char *buf= (char *)events_buf; - int rcode= 0; - int event= 1; - Log_event_type typ; + Diagnostics_area::Sql_condition_iterator it= + thd->get_stmt_da()->sql_conditions(); + const Sql_condition* cond; - DBUG_ENTER("wsrep_apply_events"); + static size_t const max_len= 2*MAX_SLAVE_ERRMSG; // 2x so that we have enough + + if (NULL == str_) + { + // this must be freeable by standard free() + str_= static_cast<char*>(malloc(max_len)); + if (NULL == str_) + { + WSREP_ERROR("Failed to allocate %zu bytes for error buffer.", max_len); + len_= 0; + return; + } + } + else + { + /* This is possible when we invoke rollback after failed applying. + * In this situation DA should not be reset yet and should contain + * all previous errors from applying and new ones from rollbacking, + * so we just overwrite is from scratch */ + } - if (thd->killed == KILL_CONNECTION && - thd->wsrep_conflict_state != REPLAYING) + char* slider= str_; + const char* const buf_end= str_ + max_len - 1; // -1: leave space for \0 + + for (cond= it++; cond && slider < buf_end; cond= it++) { - WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld", - (long long) wsrep_thd_trx_seqno(thd)); - DBUG_RETURN(WSREP_CB_FAILURE); + uint const err_code= cond->get_sql_errno(); + const char* const err_str= cond->get_message_text(); + + slider+= my_snprintf(slider, buf_end - slider, " %s, Error_code: %d;", + err_str, err_code); } - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_query_state= QUERY_EXEC; - if (thd->wsrep_conflict_state!= REPLAYING) - thd->wsrep_conflict_state= NO_CONFLICT; - mysql_mutex_unlock(&thd->LOCK_thd_data); + *slider= '\0'; + len_= slider - str_ + 1; // +1: add \0 + + WSREP_DEBUG("Error buffer for thd %llu seqno %lld, %zu bytes: %s", + thd->thread_id, (long long)wsrep_thd_trx_seqno(thd), + len_, str_ ? str_ : "(null)"); +} + +int wsrep_apply_events(THD* thd, + Relay_log_info* rli, + const void* events_buf, + size_t buf_len) +{ + char *buf= (char *)events_buf; + int rcode= 0; + int event= 1; + Log_event_type typ; + DBUG_ENTER("wsrep_apply_events"); if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", (long long) wsrep_thd_trx_seqno(thd)); - while(buf_len) + while (buf_len) { int exec_res; Log_event* ev= wsrep_read_log_event(&buf, &buf_len, - wsrep_get_apply_format(thd)); - + wsrep_get_apply_format(thd)); if (!ev) { WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu", (long long)wsrep_thd_trx_seqno(thd), buf_len); - rcode= 1; + rcode= WSREP_ERR_BAD_EVENT; goto error; } @@ -147,9 +184,12 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd, thd->set_server_id(ev->server_id); thd->set_time(); // time the query thd->transaction.start_time.reset(thd); + //#define mariadb_10_4_0 +#ifdef mariadb_10_4_0 wsrep_xid_init(&thd->transaction.xid_state.xid, thd->wsrep_trx_meta.gtid.uuid, thd->wsrep_trx_meta.gtid.seqno); +#endif thd->lex->current_select= 0; if (!ev->when) { @@ -162,13 +202,13 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd, (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); - ev->thd = thd; - exec_res = ev->apply_event(thd->wsrep_rgi); + ev->thd= thd; + exec_res= ev->apply_event(thd->wsrep_rgi); DBUG_PRINT("info", ("exec_event result: %d", exec_res)); if (exec_res) { - WSREP_WARN("RBR event %d %s apply warning: %d, %lld", + WSREP_WARN("Event %d %s apply failed: %d, seqno %lld", event, ev->get_type_str(), exec_res, (long long) wsrep_thd_trx_seqno(thd)); rcode= exec_res; @@ -178,230 +218,14 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd, } event++; - if (thd->wsrep_conflict_state!= NO_CONFLICT && - thd->wsrep_conflict_state!= REPLAYING) - WSREP_WARN("conflict state after RBR event applying: %d, %lld", - thd->wsrep_query_state, (long long)wsrep_thd_trx_seqno(thd)); - - if (thd->wsrep_conflict_state == MUST_ABORT) { - WSREP_WARN("RBR event apply failed, rolling back: %lld", - (long long) wsrep_thd_trx_seqno(thd)); - trans_rollback(thd); - thd->locked_tables_list.unlock_locked_tables(thd); - /* Release transactional metadata locks. */ - thd->mdl_context.release_transactional_locks(); - thd->wsrep_conflict_state= NO_CONFLICT; - DBUG_RETURN(WSREP_CB_FAILURE); - } - delete_or_keep_event_post_apply(thd->wsrep_rgi, typ, ev); } - error: - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_query_state= QUERY_IDLE; - mysql_mutex_unlock(&thd->LOCK_thd_data); - - assert(thd->wsrep_exec_mode== REPL_RECV); - +error: if (thd->killed == KILL_CONNECTION) WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd)); - if (rcode) DBUG_RETURN(WSREP_CB_FAILURE); - DBUG_RETURN(WSREP_CB_SUCCESS); -} - -wsrep_cb_status_t wsrep_apply_cb(void* const ctx, - const void* const buf, - size_t const buf_len, - uint32_t const flags, - const wsrep_trx_meta_t* meta) -{ - THD* const thd((THD*)ctx); - - assert(thd->wsrep_apply_toi == false); - - // Allow tests to block the applier thread using the DBUG facilities. - DBUG_EXECUTE_IF("sync.wsrep_apply_cb", - { - const char act[]= - "now " - "SIGNAL sync.wsrep_apply_cb_reached " - "WAIT_FOR signal.wsrep_apply_cb"; - DBUG_ASSERT(!debug_sync_set_action(thd, - STRING_WITH_LEN(act))); - };); - - thd->wsrep_trx_meta = *meta; - -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Applying write set %lld: %p, %zu", - (long long)wsrep_thd_trx_seqno(thd), buf, buf_len); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Applying write set"); -#endif /* WSREP_PROC_INFO */ - - /* tune FK and UK checking policy */ - if (wsrep_slave_UK_checks == FALSE) - thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; - else - thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; - - if (wsrep_slave_FK_checks == FALSE) - thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; - else - thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; - - /* With galera we assume that the master has done the constraint checks */ - thd->variables.option_bits|= OPTION_NO_CHECK_CONSTRAINT_CHECKS; - - if (flags & WSREP_FLAG_ISOLATION) - { - thd->wsrep_apply_toi= true; - /* - Don't run in transaction mode with TOI actions. - */ - thd->variables.option_bits&= ~OPTION_BEGIN; - thd->server_status&= ~SERVER_STATUS_IN_TRANS; - } - wsrep_cb_status_t rcode(wsrep_apply_events(thd, buf, buf_len)); - -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Applied write set %lld", (long long)wsrep_thd_trx_seqno(thd)); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Applied write set"); -#endif /* WSREP_PROC_INFO */ - - if (WSREP_CB_SUCCESS != rcode) - { - wsrep_dump_rbr_buf_with_header(thd, buf, buf_len); - } - - if (thd->has_thd_temporary_tables()) - { - WSREP_DEBUG("Applier %lld has temporary tables. Closing them now..", - thd->thread_id); - thd->close_temporary_tables(); - } - - return rcode; -} - -static wsrep_cb_status_t wsrep_commit(THD* const thd) -{ -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Committing %lld", (long long)wsrep_thd_trx_seqno(thd)); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Committing"); -#endif /* WSREP_PROC_INFO */ - - wsrep_cb_status_t const rcode(trans_commit(thd) ? - WSREP_CB_FAILURE : WSREP_CB_SUCCESS); - - if (WSREP_CB_SUCCESS == rcode) - { - thd->wsrep_rgi->cleanup_context(thd, false); -#ifdef GTID_SUPPORT - thd->variables.gtid_next.set_automatic(); -#endif /* GTID_SUPPORT */ - if (thd->wsrep_apply_toi) - { - wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid, - thd->wsrep_trx_meta.gtid.seqno); - } - } - -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Committed %lld", (long long) wsrep_thd_trx_seqno(thd)); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Committed"); -#endif /* WSREP_PROC_INFO */ - - return rcode; -} - -static wsrep_cb_status_t wsrep_rollback(THD* const thd) -{ -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Rolling back %lld", (long long)wsrep_thd_trx_seqno(thd)); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Rolling back"); -#endif /* WSREP_PROC_INFO */ - - wsrep_cb_status_t const rcode(trans_rollback(thd) ? - WSREP_CB_FAILURE : WSREP_CB_SUCCESS); - -#ifdef WSREP_PROC_INFO - snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Rolled back %lld", (long long)wsrep_thd_trx_seqno(thd)); - thd_proc_info(thd, thd->wsrep_info); -#else - thd_proc_info(thd, "Rolled back"); -#endif /* WSREP_PROC_INFO */ - - return rcode; -} - -wsrep_cb_status_t wsrep_commit_cb(void* const ctx, - uint32_t const flags, - const wsrep_trx_meta_t* meta, - wsrep_bool_t* const exit, - bool const commit) -{ - THD* const thd((THD*)ctx); - - assert(meta->gtid.seqno == wsrep_thd_trx_seqno(thd)); - - wsrep_cb_status_t rcode; - - if (commit) - rcode = wsrep_commit(thd); - else - rcode = wsrep_rollback(thd); - - /* Cleanup */ wsrep_set_apply_format(thd, NULL); - thd->mdl_context.release_transactional_locks(); - thd->reset_query(); /* Mutex protected */ - free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); - thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; - if (wsrep_slave_count_change < 0 && commit && WSREP_CB_SUCCESS == rcode) - { - mysql_mutex_lock(&LOCK_wsrep_slave_threads); - if (wsrep_slave_count_change < 0) - { - wsrep_slave_count_change++; - *exit = true; - } - mysql_mutex_unlock(&LOCK_wsrep_slave_threads); - } - - if (thd->wsrep_applier) - { - /* From trans_begin() */ - thd->variables.option_bits|= OPTION_BEGIN; - thd->server_status|= SERVER_STATUS_IN_TRANS; - thd->wsrep_apply_toi= false; - } - - return rcode; -} - - -wsrep_cb_status_t wsrep_unordered_cb(void* const ctx, - const void* const data, - size_t const size) -{ - return WSREP_CB_SUCCESS; + DBUG_RETURN(rcode); } diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h index f19d2d46d0c..a8da2acbb9a 100644 --- a/sql/wsrep_applier.h +++ b/sql/wsrep_applier.h @@ -1,4 +1,4 @@ -/* Copyright 2013 Codership Oy <http://www.codership.com> +/* Copyright 2013-2015 Codership Oy <http://www.codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,28 +17,57 @@ #define WSREP_APPLIER_H #include <my_config.h> -#include "../wsrep/wsrep_api.h" -void wsrep_set_apply_format(THD* thd, Format_description_log_event* ev); -Format_description_log_event* wsrep_get_apply_format(THD* thd); +#include "sql_class.h" // THD class + +int wsrep_apply_events(THD* thd, + Relay_log_info* rli, + const void* events_buf, + size_t buf_len); -/* wsrep callback prototypes */ -extern "C" { -wsrep_cb_status_t wsrep_apply_cb(void *ctx, - const void* buf, size_t buf_len, - uint32_t flags, - const wsrep_trx_meta_t* meta); +/* Applier error codes, when nothing better is available. */ +#define WSREP_RET_SUCCESS 0 // Success +#define WSREP_ERR_GENERIC 1 // When in doubt (MySQL default error code) +#define WSREP_ERR_BAD_EVENT 2 // Can't parse event +#define WSREP_ERR_NOT_FOUND 3 // Key. table, schema not found +#define WSREP_ERR_EXISTS 4 // Key, table, schema already exists +#define WSREP_ERR_WRONG_TYPE 5 // Incompatible data type +#define WSREP_ERR_FAILED 6 // Operation failed for some internal reason +#define WSREP_ERR_ABORTED 7 // Operation was aborted externally -wsrep_cb_status_t wsrep_commit_cb(void *ctx, - uint32_t flags, - const wsrep_trx_meta_t* meta, - wsrep_bool_t* exit, - bool commit); +class wsrep_apply_error +{ +public: + wsrep_apply_error() : str_(NULL), len_(0) {}; + ~wsrep_apply_error() { ::free(str_); } + /* stores the current THD error info from the diagnostic area. Works only + * once, subsequent invocations are ignored in order to preserve the original + * condition. */ + void store(const THD* thd); + const char* c_str() const { return str_; } + size_t length() const { return len_; } + bool is_null() const { return (c_str() == NULL && length() == 0); } + wsrep_buf_t get_buf() const + { + wsrep_buf_t ret= { c_str(), length() }; + return ret; + } +private: + char* str_; + size_t len_; +}; + +class Format_description_log_event; +void wsrep_set_apply_format(THD*, Format_description_log_event*); +Format_description_log_event* wsrep_get_apply_format(THD* thd); +int wsrep_apply(void* ctx, + uint32_t flags, + const wsrep_buf_t* buf, + const wsrep_trx_meta_t* meta, + wsrep_apply_error& err); -wsrep_cb_status_t wsrep_unordered_cb(void* ctx, - const void* data, - size_t size); +wsrep_cb_status_t wsrep_unordered_cb(void* ctx, + const wsrep_buf_t* data); -} /* extern "C" */ #endif /* WSREP_APPLIER_H */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc index 0cbcdcd64aa..b02692d14fe 100644 --- a/sql/wsrep_binlog.cc +++ b/sql/wsrep_binlog.cc @@ -14,12 +14,15 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "mariadb.h" +#include "mysql/service_wsrep.h" #include "wsrep_binlog.h" #include "wsrep_priv.h" #include "log.h" #include "log_event.h" #include "wsrep_applier.h" +#include "transaction.h" + extern handlerton *binlog_hton; /* Write the contents of a cache to a memory buffer. @@ -40,10 +43,10 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) DBUG_RETURN(ER_ERROR_ON_WRITE); } - uint length = my_b_bytes_in_cache(cache); - if (unlikely(0 == length)) length = my_b_fill(cache); + uint length= my_b_bytes_in_cache(cache); + if (unlikely(0 == length)) length= my_b_fill(cache); - size_t total_length = 0; + size_t total_length= 0; if (likely(length > 0)) do { @@ -60,7 +63,7 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) wsrep_max_ws_size, total_length); goto error; } - uchar* tmp = (uchar *)my_realloc(*buf, total_length, + uchar* tmp= (uchar *)my_realloc(*buf, total_length, MYF(MY_ALLOW_ZERO_PTR)); if (!tmp) { @@ -68,17 +71,17 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) *buf_len, length); goto error; } - *buf = tmp; + *buf= tmp; memcpy(*buf + *buf_len, cache->read_pos, length); - *buf_len = total_length; + *buf_len= total_length; if (cache->file < 0) { cache->read_pos= cache->read_end; break; } - } while ((length = my_b_fill(cache))); + } while ((length= my_b_fill(cache))); if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) { @@ -111,130 +114,6 @@ heap_size(size_t length) return (length + HEAP_PAGE_SIZE - 1)/HEAP_PAGE_SIZE*HEAP_PAGE_SIZE; } -/* append data to writeset */ -static inline wsrep_status_t -wsrep_append_data(wsrep_t* const wsrep, - wsrep_ws_handle_t* const ws, - const void* const data, - size_t const len) -{ - struct wsrep_buf const buff = { data, len }; - wsrep_status_t const rc(wsrep->append_data(wsrep, ws, &buff, 1, - WSREP_DATA_ORDERED, true)); - DBUG_DUMP("buff", (uchar*) data, len); - if (rc != WSREP_OK) - { - WSREP_WARN("append_data() returned %d", rc); - } - - return rc; -} - -/* - Write the contents of a cache to wsrep provider. - - This function quite the same as MYSQL_BIN_LOG::write_cache(), - with the exception that here we write in buffer instead of log file. - - This version reads all of cache into single buffer and then appends to a - writeset at once. - */ -static int wsrep_write_cache_once(wsrep_t* const wsrep, - THD* const thd, - IO_CACHE* const cache, - size_t* const len) -{ - my_off_t const saved_pos(my_b_tell(cache)); - DBUG_ENTER("wsrep_write_cache_once"); - - if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) - { - WSREP_ERROR("failed to initialize io-cache"); - DBUG_RETURN(ER_ERROR_ON_WRITE); - } - - int err(WSREP_OK); - - size_t total_length(0); - uchar stack_buf[STACK_SIZE]; /* to avoid dynamic allocations for few data*/ - uchar* heap_buf(NULL); - uchar* buf(stack_buf); - size_t allocated(sizeof(stack_buf)); - size_t used(0); - - uint length(my_b_bytes_in_cache(cache)); - if (unlikely(0 == length)) length = my_b_fill(cache); - - if (likely(length > 0)) do - { - total_length += length; - /* - Bail out if buffer grows too large. - A temporary fix to avoid allocating indefinitely large buffer, - not a real limit on a writeset size which includes other things - like header and keys. - */ - if (unlikely(total_length > wsrep_max_ws_size)) - { - WSREP_WARN("transaction size limit (%lu) exceeded: %zu", - wsrep_max_ws_size, total_length); - err = WSREP_TRX_SIZE_EXCEEDED; - goto cleanup; - } - - if (total_length > allocated) - { - size_t const new_size(heap_size(total_length)); - uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, - MYF(MY_ALLOW_ZERO_PTR)); - if (!tmp) - { - WSREP_ERROR("could not (re)allocate buffer: %zu + %u", - allocated, length); - err = WSREP_TRX_SIZE_EXCEEDED; - goto cleanup; - } - - heap_buf = tmp; - buf = heap_buf; - allocated = new_size; - - if (used <= STACK_SIZE && used > 0) // there's data in stack_buf - { - DBUG_ASSERT(buf == stack_buf); - memcpy(heap_buf, stack_buf, used); - } - } - - memcpy(buf + used, cache->read_pos, length); - used = total_length; - if (cache->file < 0) - { - cache->read_pos= cache->read_end; - break; - } - } while ((length = my_b_fill(cache))); - - if (used > 0) - err = wsrep_append_data(wsrep, &thd->wsrep_ws_handle, buf, used); - - if (WSREP_OK == err) *len = total_length; - -cleanup: - if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) - { - WSREP_ERROR("failed to reinitialize io-cache"); - } - - if (unlikely(WSREP_OK != err)) - { - wsrep_dump_rbr_buf_with_header(thd, buf, used); - } - - my_free(heap_buf); - DBUG_RETURN(err); -} - /* Write the contents of a cache to wsrep provider. @@ -243,62 +122,58 @@ cleanup: This version uses incremental data appending as it reads it from cache. */ -static int wsrep_write_cache_inc(wsrep_t* const wsrep, - THD* const thd, +static int wsrep_write_cache_inc(THD* const thd, IO_CACHE* const cache, size_t* const len) { - my_off_t const saved_pos(my_b_tell(cache)); - DBUG_ENTER("wsrep_write_cache_inc"); - - if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) - { - WSREP_ERROR("failed to initialize io-cache"); - DBUG_RETURN(WSREP_TRX_ERROR); - } + DBUG_ENTER("wsrep_write_cache_inc"); + my_off_t const saved_pos(my_b_tell(cache)); - int err(WSREP_OK); + if (reinit_io_cache(cache, READ_CACHE, thd->wsrep_sr().bytes_certified(), 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + DBUG_RETURN(1);; + } - size_t total_length(0); + int ret= 0; + size_t total_length(0); - uint length(my_b_bytes_in_cache(cache)); - if (unlikely(0 == length)) length = my_b_fill(cache); + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length= my_b_fill(cache); - if (likely(length > 0)) do + if (likely(length > 0)) + { + do { - total_length += length; - /* bail out if buffer grows too large - not a real limit on a writeset size which includes other things - like header and keys. - */ - if (unlikely(total_length > wsrep_max_ws_size)) - { - WSREP_WARN("transaction size limit (%lu) exceeded: %zu", - wsrep_max_ws_size, total_length); - err = WSREP_TRX_SIZE_EXCEEDED; - goto cleanup; - } - - if(WSREP_OK != (err=wsrep_append_data(wsrep, &thd->wsrep_ws_handle, - cache->read_pos, length))) - goto cleanup; - - if (cache->file < 0) - { - cache->read_pos= cache->read_end; - break; - } - } while ((length = my_b_fill(cache))); - - if (WSREP_OK == err) *len = total_length; + total_length += length; + /* bail out if buffer grows too large + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + ret= 1; + goto cleanup; + } + if (thd->wsrep_cs().append_data(wsrep::const_buffer(cache->read_pos, length))) + goto cleanup; + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (length= my_b_fill(cache))); + } + if (ret == 0) + { + assert(total_length + thd->wsrep_sr().bytes_certified() == saved_pos); + } cleanup: - if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) - { - WSREP_ERROR("failed to reinitialize io-cache"); - } - - DBUG_RETURN(err); + *len= total_length; + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + DBUG_RETURN(ret); } /* @@ -307,17 +182,11 @@ cleanup: This function quite the same as MYSQL_BIN_LOG::write_cache(), with the exception that here we write in buffer instead of log file. */ -int wsrep_write_cache(wsrep_t* const wsrep, - THD* const thd, +int wsrep_write_cache(THD* const thd, IO_CACHE* const cache, size_t* const len) { - if (wsrep_incremental_data_collection) { - return wsrep_write_cache_inc(wsrep, thd, cache, len); - } - else { - return wsrep_write_cache_once(wsrep, thd, cache, len); - } + return wsrep_write_cache_inc(thd, cache, len); } void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len) @@ -383,80 +252,17 @@ int wsrep_binlog_close_connection(THD* thd) int wsrep_binlog_savepoint_set(THD *thd, void *sv) { if (!wsrep_emulate_bin_log) return 0; - int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv); + int rcode= binlog_hton->savepoint_set(binlog_hton, thd, sv); return rcode; } int wsrep_binlog_savepoint_rollback(THD *thd, void *sv) { if (!wsrep_emulate_bin_log) return 0; - int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv); + int rcode= binlog_hton->savepoint_rollback(binlog_hton, thd, sv); return rcode; } -#if 0 -void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache) -{ - char filename[PATH_MAX]= {0}; - int len= snprintf(filename, PATH_MAX, "%s/GRA_%lld_%lld.log", - wsrep_data_home_dir, (longlong) thd->thread_id, - (longlong) wsrep_thd_trx_seqno(thd)); - size_t bytes_in_cache = 0; - // check path - if (len >= PATH_MAX) - { - WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); - return ; - } - // init cache - my_off_t const saved_pos(my_b_tell(cache)); - if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) - { - WSREP_ERROR("failed to initialize io-cache"); - return ; - } - // open file - FILE* of = fopen(filename, "wb"); - if (!of) - { - WSREP_ERROR("Failed to open file '%s': %d (%s)", - filename, errno, strerror(errno)); - goto cleanup; - } - // ready to write - bytes_in_cache= my_b_bytes_in_cache(cache); - if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache); - if (likely(bytes_in_cache > 0)) do - { - if (my_fwrite(of, cache->read_pos, bytes_in_cache, - MYF(MY_WME | MY_NABP)) == (size_t) -1) - { - WSREP_ERROR("Failed to write file '%s'", filename); - goto cleanup; - } - - if (cache->file < 0) - { - cache->read_pos= cache->read_end; - break; - } - } while ((bytes_in_cache= my_b_fill(cache))); - if (cache->error == -1) - { - WSREP_ERROR("RBR inconsistent"); - goto cleanup; - } -cleanup: - // init back - if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) - { - WSREP_ERROR("failed to reinitialize io-cache"); - } - // close file - if (of) fclose(of); -} -#endif - void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end) { thd->binlog_flush_pending_rows_event(stmt_end); @@ -544,3 +350,31 @@ cleanup1: DBUG_VOID_RETURN; } +#include "log_event.h" + +int wsrep_write_skip_event(THD* thd) +{ + DBUG_ENTER("wsrep_write_skip_event"); + Ignorable_log_event skip_event(thd); + int ret= mysql_bin_log.write_event(&skip_event); + if (ret) + { + WSREP_WARN("wsrep_write_skip_event: write to binlog failed: %d", ret); + } + if (!ret && (ret= trans_commit_stmt(thd))) + { + WSREP_WARN("wsrep_write_skip_event: statt commit failed"); + } + DBUG_RETURN(ret); +} + +int wsrep_write_dummy_event_low(THD *thd, const char *msg) +{ + ::abort(); + return 0; +} + +int wsrep_write_dummy_event(THD *orig_thd, const char *msg) +{ + return 0; +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h index 864813d5c98..4cef38c85d3 100644 --- a/sql/wsrep_binlog.h +++ b/sql/wsrep_binlog.h @@ -16,6 +16,7 @@ #ifndef WSREP_BINLOG_H #define WSREP_BINLOG_H +#include "my_global.h" #include "sql_class.h" // THD, IO_CACHE #define HEAP_PAGE_SIZE 65536 /* 64K */ @@ -38,23 +39,39 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len); @param len total amount of data written @return wsrep error status */ -int wsrep_write_cache (wsrep_t* const wsrep, - THD* const thd, - IO_CACHE* const cache, - size_t* const len); +int wsrep_write_cache(THD* thd, + IO_CACHE* cache, + size_t* len); /* Dump replication buffer to disk */ void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len); -/* Dump replication buffer to disk without intermediate buffer */ -void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); - /* Dump replication buffer along with header to a file */ void wsrep_dump_rbr_buf_with_header(THD *thd, const void *rbr_buf, size_t buf_len); int wsrep_binlog_close_connection(THD* thd); -int wsrep_binlog_savepoint_set(THD *thd, void *sv); -int wsrep_binlog_savepoint_rollback(THD *thd, void *sv); + +/** + Write a skip event into binlog. + + @param thd Thread object pointer + @return Zero in case of success, non-zero on failure. +*/ +int wsrep_write_skip_event(THD* thd); + +/* + Write dummy event into binlog in place of unused GTID. + The binlog write is done in thd context. +*/ +int wsrep_write_dummy_event_low(THD *thd, const char *msg); +/* + Write dummy event to binlog in place of unused GTID and + commit. The binlog write and commit are done in temporary + thd context, the original thd state is not altered. +*/ +int wsrep_write_dummy_event(THD* thd, const char *msg); + +void wsrep_register_binlog_handler(THD *thd, bool trx); #endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc index 0b7a9ca6252..7b8067ef238 100644 --- a/sql/wsrep_check_opts.cc +++ b/sql/wsrep_check_opts.cc @@ -33,7 +33,7 @@ int wsrep_check_opts() autoinc_lock_mode->val_int(&is_null, 0, OPT_GLOBAL, 0) != 2) { WSREP_ERROR("Parallel applying (wsrep_slave_threads > 1) requires" - " innodb_autoinc_lock_mode = 2."); + " innodb_autoinc_lock_mode= 2."); return 1; } } @@ -88,7 +88,7 @@ int wsrep_check_opts() { if (global_system_variables.binlog_format != BINLOG_FORMAT_ROW) { - WSREP_ERROR("Only binlog_format = 'ROW' is currently supported. " + WSREP_ERROR("Only binlog_format= 'ROW' is currently supported. " "Configured value: '%s'. Please adjust your " "configuration.", binlog_format_names[global_system_variables.binlog_format]); diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc new file mode 100644 index 00000000000..994fa97db60 --- /dev/null +++ b/sql/wsrep_client_service.cc @@ -0,0 +1,307 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_client_service.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_applier.h" /* wsrep_apply_events() */ +#include "wsrep_binlog.h" /* wsrep_dump_rbr_buf() */ +#include "wsrep_schema.h" /* remove_fragments() */ +#include "wsrep_thd.h" +#include "wsrep_xid.h" +#include "wsrep_trans_observer.h" + +#include "sql_base.h" /* close_temporary_table() */ +#include "sql_class.h" /* THD */ +#include "sql_parse.h" /* stmt_causes_implicit_commit() */ +#include "rpl_filter.h" /* binlog_filter */ +#include "rpl_rli.h" /* Relay_log_info */ +#include "slave.h" /* opt_log_slave_updates */ +#include "transaction.h" /* trans_commit()... */ +#include "log.h" /* stmt_has_updated_trans_table() */ +//#include "debug_sync.h" +#include "mysql/service_debug_sync.h" +namespace +{ + +void debug_sync_caller(THD* thd, const char* sync_point) +{ +#ifdef ENABLED_DEBUG_SYNC_OUT + debug_sync_set_action(thd, sync_point, strlen(sync_point)); +#endif +#ifdef ENABLED_DEBUG_SYNC + if (debug_sync_service) debug_sync_service(thd,sync_point,strlen(sync_point)); +#endif + +} +} + +Wsrep_client_service::Wsrep_client_service(THD* thd, + Wsrep_client_state& client_state) + : wsrep::client_service() + , m_thd(thd) + , m_client_state(client_state) +{ } + +void Wsrep_client_service::store_globals() +{ + DBUG_ENTER("Wsrep_client_service::store_globals"); + m_thd->store_globals(); + DBUG_VOID_RETURN; +} + +void Wsrep_client_service::reset_globals() +{ + DBUG_ENTER("Wsrep_client_service::reset_globals"); + m_thd->reset_globals(); + DBUG_VOID_RETURN; +} + +bool Wsrep_client_service::interrupted() const +{ + DBUG_ASSERT(m_thd == current_thd); + mysql_mutex_lock(&m_thd->LOCK_thd_data); + + /* wsrep state can be interrupted only if THD was explicitly killed, + for wsrep conflicts, we use deadlock error only + */ + bool ret= (m_thd->killed != NOT_KILLED && + m_thd->wsrep_trx().state() != wsrep::transaction::s_must_abort && + m_thd->wsrep_trx().state() != wsrep::transaction::s_aborting && + m_thd->wsrep_trx().state() != wsrep::transaction::s_aborted); + mysql_mutex_unlock(&m_thd->LOCK_thd_data); + if (ret) + { + WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", + m_thd->killed, m_thd->wsrep_trx().state()); + } + return ret; +} + +int Wsrep_client_service::prepare_data_for_replication() +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_ENTER("Wsrep_client_service::prepare_data_for_replication"); + size_t data_len= 0; + IO_CACHE* cache= wsrep_get_trans_cache(m_thd); + + if (cache) + { + m_thd->binlog_flush_pending_rows_event(true); + if (wsrep_write_cache(m_thd, cache, &data_len)) + { + WSREP_ERROR("rbr write fail, data_len: %zu", + data_len); + // wsrep_override_error(m_thd, ER_ERROR_DURING_COMMIT); + DBUG_RETURN(1); + } + } + + if (data_len == 0) + { + if (m_thd->get_stmt_da()->is_ok() && + m_thd->get_stmt_da()->affected_rows() > 0 && + !binlog_filter->is_on() && + !m_thd->wsrep_trx().is_streaming()) + { + WSREP_DEBUG("empty rbr buffer, query: %s, " + "affected rows: %llu, " + "changed tables: %d, " + "sql_log_bin: %d", + WSREP_QUERY(m_thd), + m_thd->get_stmt_da()->affected_rows(), + stmt_has_updated_trans_table(m_thd), + m_thd->variables.sql_log_bin); + } + else + { + WSREP_DEBUG("empty rbr buffer, query: %s", WSREP_QUERY(m_thd)); + } + } + DBUG_RETURN(0); +} + + +void Wsrep_client_service::cleanup_transaction() +{ + DBUG_ASSERT(m_thd == current_thd); + if (WSREP_EMULATE_BINLOG(m_thd)) wsrep_thd_binlog_trx_reset(m_thd); + m_thd->wsrep_affected_rows= 0; +} + + +int Wsrep_client_service::prepare_fragment_for_replication(wsrep::mutable_buffer& buffer) +{ + DBUG_ASSERT(m_thd == current_thd); + THD* thd= m_thd; + DBUG_ENTER("Wsrep_client_service::prepare_fragment_for_replication"); + IO_CACHE* cache= wsrep_get_trans_cache(thd); + thd->binlog_flush_pending_rows_event(true); + + if (!cache) + { + DBUG_RETURN(0); + } + + const my_off_t saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, thd->wsrep_sr().bytes_certified(), 0, 0)) + { + DBUG_RETURN(1); + } + + int ret= 0; + size_t total_length= 0; + size_t length= my_b_bytes_in_cache(cache); + + if (!length) + { + length= my_b_fill(cache); + } + + if (length > 0) + { + do + { + total_length+= length; + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + ret= 1; + goto cleanup; + } + + buffer.push_back(reinterpret_cast<const char*>(cache->read_pos), + reinterpret_cast<const char*>(cache->read_pos + length)); + cache->read_pos= cache->read_end; + } + while (cache->file >= 0 && (length= my_b_fill(cache))); + } + DBUG_ASSERT(total_length == buffer.size()); +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("Failed to reinitialize IO cache"); + ret= 1; + } + DBUG_RETURN(ret); +} + +int Wsrep_client_service::remove_fragments() +{ + DBUG_ENTER("Wsrep_client_service::remove_fragments"); + if (wsrep_schema->remove_fragments(m_thd, + Wsrep_server_state::instance().id(), + m_thd->wsrep_trx().id(), + m_thd->wsrep_sr().fragments())) + { + WSREP_DEBUG("Failed to remove fragments from SR storage for transaction " + "%llu, %llu", + m_thd->thread_id, m_thd->wsrep_trx().id().get()); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +bool Wsrep_client_service::statement_allowed_for_streaming() const +{ + /* + Todo: Decide if implicit commit is allowed with streaming + replication. + !stmt_causes_implicit_commit(m_thd, CF_IMPLICIT_COMMIT_BEGIN); + */ + return true; +} + +size_t Wsrep_client_service::bytes_generated() const +{ + IO_CACHE* cache= wsrep_get_trans_cache(m_thd); + if (cache) + { + m_thd->binlog_flush_pending_rows_event(true); + return my_b_tell(cache); + } + return 0; +} + +void Wsrep_client_service::will_replay() +{ + DBUG_ASSERT(m_thd == current_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + ++wsrep_replaying; + mysql_mutex_unlock(&LOCK_wsrep_replaying); +} + +enum wsrep::provider::status Wsrep_client_service::replay() +{ + DBUG_ASSERT(m_thd == current_thd); + Wsrep_replayer_service replayer_service(m_thd); + wsrep::provider& provider(m_thd->wsrep_cs().provider()); + mysql_mutex_lock(&m_thd->LOCK_thd_data); + m_thd->killed= NOT_KILLED; + mysql_mutex_unlock(&m_thd->LOCK_thd_data); + enum wsrep::provider::status ret= + provider.replay(m_thd->wsrep_trx().ws_handle(), &replayer_service); + replayer_service.replay_status(ret); + mysql_mutex_lock(&LOCK_wsrep_replaying); + --wsrep_replaying; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + return ret; +} + +void Wsrep_client_service::wait_for_replayers(wsrep::unique_lock<wsrep::mutex>& lock) +{ + DBUG_ASSERT(m_thd == current_thd); + lock.unlock(); + mysql_mutex_lock(&LOCK_wsrep_replaying); + while (wsrep_replaying > 0) + { + mysql_cond_wait(&COND_wsrep_replaying, &LOCK_wsrep_replaying); + } + mysql_mutex_unlock(&LOCK_wsrep_replaying); + lock.lock(); +} + +void Wsrep_client_service::debug_sync(const char* sync_point) +{ + DBUG_ASSERT(m_thd == current_thd); + debug_sync_caller(m_thd, sync_point); +} + +void Wsrep_client_service::debug_crash(const char* crash_point) +{ + // DBUG_ASSERT(m_thd == current_thd); + DBUG_EXECUTE_IF(crash_point, DBUG_SUICIDE(); ); +} + +int Wsrep_client_service::bf_rollback() +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_ENTER("Wsrep_client_service::rollback"); + + int ret= (trans_rollback_stmt(m_thd) || trans_rollback(m_thd)); + if (m_thd->locked_tables_mode && m_thd->lock) + { + m_thd->locked_tables_list.unlock_locked_tables(m_thd); + m_thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + } + if (m_thd->global_read_lock.is_acquired()) + { + m_thd->global_read_lock.unlock_global_read_lock(m_thd); + } + m_thd->mdl_context.release_transactional_locks(); + m_thd->mdl_context.release_explicit_locks(); + + DBUG_RETURN(ret); +} diff --git a/sql/wsrep_client_service.h b/sql/wsrep_client_service.h new file mode 100644 index 00000000000..43edae3441d --- /dev/null +++ b/sql/wsrep_client_service.h @@ -0,0 +1,63 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/** @file wsrep_client_service.h + + This file provides declaratios for client service implementation. + See wsrep/client_service.hpp for interface documentation. +*/ + +#ifndef WSREP_CLIENT_SERVICE_H +#define WSREP_CLIENT_SERVICE_H + +/* wsrep-lib */ +#include "wsrep/client_service.hpp" +#include "wsrep/client_state.hpp" +#include "wsrep/exception.hpp" /* not_implemented_error, remove when finished */ + +class THD; +class Wsrep_client_state; +class Wsrep_high_priority_context; + +class Wsrep_client_service : public wsrep::client_service +{ +public: + Wsrep_client_service(THD*, Wsrep_client_state&); + + bool interrupted() const; + void reset_globals(); + void store_globals(); + int prepare_data_for_replication(); + void cleanup_transaction(); + bool statement_allowed_for_streaming() const; + size_t bytes_generated() const; + int prepare_fragment_for_replication(wsrep::mutable_buffer&); + int remove_fragments(); + void emergency_shutdown() + { throw wsrep::not_implemented_error(); } + void will_replay(); + enum wsrep::provider::status replay(); + void wait_for_replayers(wsrep::unique_lock<wsrep::mutex>&); + void debug_sync(const char*); + void debug_crash(const char*); + int bf_rollback(); +private: + friend class Wsrep_server_service; + THD* m_thd; + Wsrep_client_state& m_client_state; +}; + + +#endif /* WSREP_CLIENT_SERVICE_H */ diff --git a/sql/wsrep_client_state.h b/sql/wsrep_client_state.h new file mode 100644 index 00000000000..403bfa81365 --- /dev/null +++ b/sql/wsrep_client_state.h @@ -0,0 +1,47 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_CLIENT_STATE_H +#define WSREP_CLIENT_STATE_H + +/* wsrep-lib */ +#include "wsrep/client_state.hpp" +#include "my_global.h" + +class THD; + +class Wsrep_client_state : public wsrep::client_state +{ +public: + Wsrep_client_state(THD* thd, + wsrep::mutex& mutex, + wsrep::condition_variable& cond, + wsrep::server_state& server_state, + wsrep::client_service& client_service, + const wsrep::client_id& id) + : wsrep::client_state(mutex, + cond, + server_state, + client_service, + id, + wsrep::client_state::m_local) + , m_thd(thd) + { } + THD* thd() { return m_thd; } +private: + THD* m_thd; +}; + +#endif /* WSREP_CLIENT_STATE_H */ diff --git a/sql/wsrep_condition_variable.h b/sql/wsrep_condition_variable.h new file mode 100644 index 00000000000..4412154e67b --- /dev/null +++ b/sql/wsrep_condition_variable.h @@ -0,0 +1,54 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_CONDITION_VARIABLE_H +#define WSREP_CONDITION_VARIABLE_H + +/* wsrep-lib */ +#include "wsrep/condition_variable.hpp" + +/* implementation */ +#include "my_pthread.h" + +class Wsrep_condition_variable : public wsrep::condition_variable +{ +public: + + Wsrep_condition_variable(mysql_cond_t& cond) + : m_cond(cond) + { } + ~Wsrep_condition_variable() + { } + + void notify_one() + { + mysql_cond_signal(&m_cond); + } + + void notify_all() + { + mysql_cond_broadcast(&m_cond); + } + + void wait(wsrep::unique_lock<wsrep::mutex>& lock) + { + mysql_mutex_t* mutex= static_cast<mysql_mutex_t*>(lock.mutex().native()); + mysql_cond_wait(&m_cond, mutex); + } +private: + mysql_cond_t& m_cond; +}; + +#endif /* WSREP_CONDITION_VARIABLE_H */ diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index 9a4bbd01bcd..916788483ab 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -17,16 +17,10 @@ #include <sql_class.h> #include <mysql/service_wsrep.h> -my_bool wsrep_thd_is_BF(THD *, my_bool) +my_bool wsrep_thd_is_BF(const THD *, my_bool) { return 0; } -int wsrep_trx_order_before(THD *, THD *) -{ return 0; } - -enum wsrep_conflict_state wsrep_thd_conflict_state(THD *, my_bool) -{ return NO_CONFLICT; } - -int wsrep_is_wsrep_xid(const XID*) +int wsrep_is_wsrep_xid(const void* xid) { return 0; } long long wsrep_xid_seqno(const XID* x) @@ -34,111 +28,99 @@ long long wsrep_xid_seqno(const XID* x) const unsigned char* wsrep_xid_uuid(const XID*) { - static const unsigned char uuid[16] = {0}; + static const unsigned char uuid[16]= {0}; return uuid; } +bool wsrep_prepare_key_for_innodb(THD* thd, const uchar*, size_t, const uchar*, size_t, struct wsrep_buf*, size_t*) +{ return 1; } + bool wsrep_prepare_key(const uchar*, size_t, const uchar*, size_t, struct wsrep_buf*, size_t*) { return 0; } struct wsrep *get_wsrep() { return 0; } -my_bool get_wsrep_certify_nonPK() -{ return 0; } - -my_bool get_wsrep_debug() -{ return 0; } - -my_bool get_wsrep_drupal_282555_workaround() -{ return 0; } - -my_bool get_wsrep_load_data_splitting() -{ return 0; } - my_bool get_wsrep_recovery() { return 0; } -my_bool get_wsrep_log_conflicts() -{ return 0; } - -long get_wsrep_protocol_version() -{ return 0; } - -my_bool wsrep_aborting_thd_contains(THD *) -{ return 0; } - -void wsrep_aborting_thd_enqueue(THD *) -{ } - bool wsrep_consistency_check(THD *) { return 0; } void wsrep_lock_rollback() { } -int wsrep_on(THD *thd) +my_bool wsrep_on(const THD *) { return 0; } -void wsrep_post_commit(THD*, bool) -{ } - -enum wsrep_trx_status wsrep_run_wsrep_commit(THD *, bool) -{ return WSREP_TRX_ERROR; } - -void wsrep_thd_LOCK(THD *) -{ } - -void wsrep_thd_UNLOCK(THD *) +void wsrep_thd_LOCK(const THD *) { } -void wsrep_thd_awake(THD *, my_bool) +void wsrep_thd_UNLOCK(const THD *) { } const char *wsrep_thd_conflict_state_str(THD *) { return 0; } -enum wsrep_exec_mode wsrep_thd_exec_mode(THD *) -{ return LOCAL_STATE; } - const char *wsrep_thd_exec_mode_str(THD *) { return NULL; } -enum wsrep_conflict_state wsrep_thd_get_conflict_state(THD *) -{ return NO_CONFLICT; } +const char *wsrep_thd_query(const THD *) +{ return 0; } -my_bool wsrep_thd_is_wsrep(THD *) +const char *wsrep_thd_query_state_str(THD *) { return 0; } -char *wsrep_thd_query(THD *) +int wsrep_thd_retry_counter(const THD *) { return 0; } -enum wsrep_query_state wsrep_thd_query_state(THD *) -{ return QUERY_IDLE; } +bool wsrep_thd_ignore_table(THD *) +{ return 0; } -const char *wsrep_thd_query_state_str(THD *) +long long wsrep_thd_trx_seqno(const THD *) +{ return -1; } + +my_bool wsrep_thd_is_aborting(const THD *) { return 0; } -int wsrep_thd_retry_counter(THD *) +void wsrep_set_data_home_dir(const char *) +{ } + +my_bool wsrep_thd_is_local(const THD *) { return 0; } -void wsrep_thd_set_conflict_state(THD *, enum wsrep_conflict_state) +void wsrep_thd_self_abort(THD *) { } -bool wsrep_thd_ignore_table(THD *) +int wsrep_thd_append_key(THD *, const struct wsrep_key*, int, enum Wsrep_service_key_type) { return 0; } -longlong wsrep_thd_trx_seqno(THD *) -{ return -1; } +const char* wsrep_thd_client_state_str(const THD*) +{ return 0; } -struct wsrep_ws_handle* wsrep_thd_ws_handle(THD *) +const char* wsrep_thd_client_mode_str(const THD*) { return 0; } -int wsrep_trx_is_aborting(THD *) +const char* wsrep_thd_transaction_state_str(const THD*) { return 0; } -void wsrep_unlock_rollback() -{ } +query_id_t wsrep_thd_transaction_id(const THD *) +{ return 0; } -void wsrep_set_data_home_dir(const char *) +my_bool wsrep_thd_bf_abort(const THD *, THD *, my_bool) +{ return 0; } + +my_bool wsrep_thd_order_before(const THD*, const THD *) +{ return 0; } + +void wsrep_handle_SR_rollback(THD*, THD*) { } + +my_bool wsrep_thd_skip_locking(const THD*) +{ return 0;} + +const char* wsrep_get_sr_table_name() +{ return 0; } + +my_bool wsrep_get_debug() +{ return 0;} diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc new file mode 100644 index 00000000000..f1637e2ece0 --- /dev/null +++ b/sql/wsrep_high_priority_service.cc @@ -0,0 +1,649 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "wsrep_high_priority_service.h" +#include "wsrep_applier.h" +#include "wsrep_binlog.h" +#include "wsrep_schema.h" +#include "wsrep_xid.h" +#include "wsrep_trans_observer.h" + +#include "sql_class.h" /* THD */ +#include "transaction.h" +#include "debug_sync.h" +/* RLI */ +#include "rpl_rli.h" +#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 +#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 +#include "slave.h" +#include "rpl_mi.h" + +namespace +{ +/* + Scoped mode for applying non-transactional write sets (TOI) + */ +class Wsrep_non_trans_mode +{ +public: + Wsrep_non_trans_mode(THD* thd, const wsrep::ws_meta& ws_meta) + : m_thd(thd) + , m_option_bits(thd->variables.option_bits) + , m_server_status(thd->server_status) + { + m_thd->variables.option_bits&= ~OPTION_BEGIN; + m_thd->server_status&= ~SERVER_STATUS_IN_TRANS; + m_thd->wsrep_cs().enter_toi(ws_meta); + } + ~Wsrep_non_trans_mode() + { + m_thd->variables.option_bits= m_option_bits; + m_thd->server_status= m_server_status; + m_thd->wsrep_cs().leave_toi(); + } +private: + Wsrep_non_trans_mode(const Wsrep_non_trans_mode&); + Wsrep_non_trans_mode& operator=(const Wsrep_non_trans_mode&); + THD* m_thd; + ulonglong m_option_bits; + uint m_server_status; +}; +} + +static rpl_group_info* wsrep_relay_group_init(THD* thd, const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + + static LEX_CSTRING connection_name= { STRING_WITH_LEN("wsrep") }; + + /* + Master_info's constructor initializes rpl_filter by either an already + constructed Rpl_filter object from global 'rpl_filters' list if the + specified connection name is same, or it constructs a new Rpl_filter + object and adds it to rpl_filters. This object is later destructed by + Mater_info's destructor by looking it up based on connection name in + rpl_filters list. + + However, since all Master_info objects created here would share same + connection name ("wsrep"), destruction of any of the existing Master_info + objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced + by any/all existing Master_info objects. + + In order to avoid that, we have added a check in Master_info's destructor + to not free the "wsrep" rpl_filter. It will eventually be freed by + free_all_rpl_filters() when server terminates. + */ + rli->mi= new Master_info(&connection_name, false); + + struct rpl_group_info *rgi= new rpl_group_info(rli); + rgi->thd= rli->sql_driver_thd= thd; + + if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) + { + rgi->deferred_events= new Deferred_log_events(rli); + } + + return rgi; +} + +static void wsrep_setup_uk_and_fk_checks(THD* thd) +{ + /* Tune FK and UK checking policy. These are reset back to original + in Wsrep_high_priority_service destructor. */ + if (wsrep_slave_UK_checks == FALSE) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (wsrep_slave_FK_checks == FALSE) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; +} + +/**************************************************************************** + High priority service +*****************************************************************************/ + +Wsrep_high_priority_service::Wsrep_high_priority_service(THD* thd) + : wsrep::high_priority_service(Wsrep_server_state::instance()) + , wsrep::high_priority_context(thd->wsrep_cs()) + , m_thd(thd) + , m_rli() +{ + LEX_CSTRING db_str= { NULL, 0 }; + m_shadow.option_bits = thd->variables.option_bits; + m_shadow.server_status= thd->server_status; + m_shadow.vio = thd->net.vio; + m_shadow.tx_isolation = thd->variables.tx_isolation; + m_shadow.db = (char *)thd->db.str; + m_shadow.db_length = thd->db.length; + m_shadow.user_time = thd->user_time; + m_shadow.row_count_func= thd->get_row_count_func(); + m_shadow.wsrep_applier= thd->wsrep_applier; + + /* Disable general logging on applier threads */ + thd->variables.option_bits |= OPTION_LOG_OFF; + /* Enable binlogging if opt_log_slave_updates is set */ + if (opt_log_slave_updates) + thd->variables.option_bits|= OPTION_BIN_LOG; + else + thd->variables.option_bits&= ~(OPTION_BIN_LOG); + + thd->net.vio= 0; + thd->reset_db(&db_str); + thd->clear_error(); + thd->variables.tx_isolation= ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + /* Make THD wsrep_applier so that it cannot be killed */ + thd->wsrep_applier= true; + + if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init(thd, "wsrep_relay"); + + m_rgi= thd->wsrep_rgi; + m_rgi->thd= thd; + m_rli= m_rgi->rli; + thd_proc_info(thd, "wsrep applier idle"); +} + +Wsrep_high_priority_service::~Wsrep_high_priority_service() +{ + THD* thd= m_thd; + thd->variables.option_bits = m_shadow.option_bits; + thd->server_status = m_shadow.server_status; + thd->net.vio = m_shadow.vio; + thd->variables.tx_isolation= m_shadow.tx_isolation; + LEX_CSTRING db_str= { m_shadow.db, m_shadow.db_length }; + thd->reset_db(&db_str); + thd->user_time = m_shadow.user_time; + + if (thd->wsrep_rgi && thd->wsrep_rgi->rli) + delete thd->wsrep_rgi->rli->mi; + if (thd->wsrep_rgi) + delete thd->wsrep_rgi->rli; + delete thd->wsrep_rgi; + thd->wsrep_rgi= NULL; + + thd->set_row_count_func(m_shadow.row_count_func); + thd->wsrep_applier = m_shadow.wsrep_applier; +} + +int Wsrep_high_priority_service::start_transaction( + const wsrep::ws_handle& ws_handle, const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER(" Wsrep_high_priority_service::start_transaction"); + DBUG_RETURN(m_thd->wsrep_cs().start_transaction(ws_handle, ws_meta)); +} + +const wsrep::transaction& Wsrep_high_priority_service::transaction() const +{ + DBUG_ENTER(" Wsrep_high_priority_service::transaction"); + DBUG_RETURN(m_thd->wsrep_trx()); +} + +void Wsrep_high_priority_service::adopt_transaction(const wsrep::transaction& transaction) +{ + DBUG_ENTER(" Wsrep_high_priority_service::adopt_transaction"); + m_thd->wsrep_cs().adopt_transaction(transaction); + DBUG_VOID_RETURN; +} + + +int Wsrep_high_priority_service::append_fragment_and_commit( + const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_high_priority_service::append_fragment_and_commit"); + int ret= start_transaction(ws_handle, ws_meta); + /* + Start transaction explicitly to avoid early commit via + trans_commit_stmt() in append_fragment() + */ + ret= ret || trans_begin(m_thd); + ret= ret || wsrep_schema->append_fragment(m_thd, + ws_meta.server_id(), + ws_meta.transaction_id(), + ws_meta.seqno(), + ws_meta.flags(), + data); + + /* + Note: The commit code below seems to be identical to + Wsrep_storage_service::commit(). Consider implementing + common utility function to deal with commit. + */ + const bool do_binlog_commit= (opt_log_slave_updates && + wsrep_gtid_mode && + m_thd->variables.gtid_seq_no); + /* + Write skip event into binlog if gtid_mode is on. This is to + maintain gtid continuity. + */ + if (do_binlog_commit) + { + ret= wsrep_write_skip_event(m_thd); + } + + if (!ret) + { + ret= m_thd->wsrep_cs().prepare_for_ordering(ws_handle, + ws_meta, true); + } + + if (!ret) + { + DBUG_ASSERT(wsrep_thd_trx_seqno(m_thd) > 0); + if (!do_binlog_commit) + { + ret= wsrep_before_commit(m_thd, true); + } + ret= ret || trans_commit(m_thd); + if (!do_binlog_commit) + { + if (opt_log_slave_updates) + { + ret= ret || wsrep_ordered_commit(m_thd, true, wsrep_apply_error()); + } + ret= ret || wsrep_after_commit(m_thd, true); + } + } + m_thd->wsrep_cs().after_applying(); + m_thd->mdl_context.release_transactional_locks(); + + thd_proc_info(m_thd, "wsrep applier committed"); + + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::remove_fragments(const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::remove_fragments"); + int ret= wsrep_schema->remove_fragments(m_thd, + ws_meta.server_id(), + ws_meta.transaction_id(), + m_thd->wsrep_sr().fragments()); + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::commit(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::commit"); + THD* thd= m_thd; + DBUG_ASSERT(thd->wsrep_trx().active()); + thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, true); + thd_proc_info(thd, "committing"); + + int ret= 0; + const bool is_ordered= !ws_meta.seqno().is_undefined(); + /* If opt_log_slave_updates is not on, applier does not write + anything to binlog cache and neither wsrep_before_commit() + nor wsrep_after_commit() we be reached from binlog code + path for applier. Therefore run wsrep_before_commit() + and wsrep_after_commit() here. wsrep_ordered_commit() + will be called from wsrep_ordered_commit_if_no_binlog(). */ + if (!opt_log_slave_updates && !opt_bin_log && is_ordered) + { + if (m_thd->transaction.all.no_2pc == false) + { + ret= wsrep_before_prepare(thd, true); + ret= ret || wsrep_after_prepare(thd, true); + } + ret= ret || wsrep_before_commit(thd, true); + } + ret= ret || trans_commit(thd); + + if (ret == 0) + { + m_rgi->cleanup_context(thd, 0); + } + + if (ret == 0 && !opt_log_slave_updates && !opt_bin_log && is_ordered) + { + ret= wsrep_after_commit(thd, true); + } + + m_thd->mdl_context.release_transactional_locks(); + + thd_proc_info(thd, "wsrep applier committed"); + + if (!is_ordered) + { + /* Wsrep commit was not ordered so it does not go through commit time + hooks and remains active. Roll it back to make cleanup happen + in after_applying() call. */ + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + + must_exit_= check_exit_status(); + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::rollback"); + m_thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, false); + int ret= (trans_rollback_stmt(m_thd) || trans_rollback(m_thd)); + m_thd->mdl_context.release_transactional_locks(); + m_thd->mdl_context.release_explicit_locks(); + DBUG_RETURN(ret); +} + +int Wsrep_high_priority_service::apply_toi(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_high_priority_service::apply_toi"); + THD* thd= m_thd; + Wsrep_non_trans_mode non_trans_mode(thd, ws_meta); + + wsrep::client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(client_state.in_toi()); + + thd_proc_info(thd, "wsrep applier toi"); + + WSREP_DEBUG("Wsrep_high_priority_service::apply_toi: %lld", + client_state.toi_meta().seqno().get()); + + int ret= wsrep_apply_events(thd, m_rli, data.data(), data.size()); + if (ret != 0 || thd->wsrep_has_ignored_error) + { + wsrep_dump_rbr_buf_with_header(thd, data.data(), data.size()); + thd->wsrep_has_ignored_error= false; + /* todo: error voting */ + } + trans_commit(thd); + + thd->close_temporary_tables(); + wsrep_set_SE_checkpoint(client_state.toi_meta().gtid()); + + must_exit_= check_exit_status(); + + DBUG_RETURN(ret); +} + +void Wsrep_high_priority_service::store_globals() +{ + DBUG_ENTER("Wsrep_high_priority_service::store_globals"); + /* In addition to calling THD::store_globals(), call + wsrep::client_state::store_globals() to gain ownership of + the client state */ + m_thd->store_globals(); + m_thd->wsrep_cs().store_globals(); + DBUG_VOID_RETURN; +} + +void Wsrep_high_priority_service::reset_globals() +{ + DBUG_ENTER("Wsrep_high_priority_service::reset_globals"); + m_thd->reset_globals(); + DBUG_VOID_RETURN; +} + +void Wsrep_high_priority_service::switch_execution_context(wsrep::high_priority_service& orig_high_priority_service) +{ + DBUG_ENTER("Wsrep_high_priority_service::switch_execution_context"); + Wsrep_high_priority_service& + orig_hps= static_cast<Wsrep_high_priority_service&>(orig_high_priority_service); + m_thd->thread_stack= orig_hps.m_thd->thread_stack; + DBUG_VOID_RETURN; +} + +int Wsrep_high_priority_service::log_dummy_write_set(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_high_priority_service::log_dummy_write_set"); + int ret= 0; + DBUG_PRINT("info", + ("Wsrep_high_priority_service::log_dummy_write_set: seqno=%lld", + ws_meta.seqno().get())); + m_thd->wsrep_cs().start_transaction(ws_handle, ws_meta); + WSREP_DEBUG("Log dummy write set %lld", ws_meta.seqno().get()); + if (!(opt_log_slave_updates && wsrep_gtid_mode && m_thd->variables.gtid_seq_no)) + { + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + m_thd->wsrep_cs().after_applying(); + DBUG_RETURN(ret); +} + +void Wsrep_high_priority_service::debug_crash(const char* crash_point) +{ + DBUG_ASSERT(m_thd == current_thd); + DBUG_EXECUTE_IF(crash_point, DBUG_SUICIDE();); +} + +/**************************************************************************** + Applier service +*****************************************************************************/ + +Wsrep_applier_service::Wsrep_applier_service(THD* thd) + : Wsrep_high_priority_service(thd) +{ + thd->wsrep_applier_service= this; + thd->wsrep_cs().open(wsrep::client_id(thd->thread_id)); + thd->wsrep_cs().before_command(); + thd->wsrep_cs().debug_log_level(wsrep_debug); + +} + +Wsrep_applier_service::~Wsrep_applier_service() +{ + m_thd->wsrep_cs().after_command_before_result(); + m_thd->wsrep_cs().after_command_after_result(); + m_thd->wsrep_cs().close(); + m_thd->wsrep_cs().cleanup(); +} + +int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_applier_service::apply_write_set"); + THD* thd= m_thd; + + thd->variables.option_bits |= OPTION_BEGIN; + thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT; + DBUG_ASSERT(thd->wsrep_trx().active()); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_executing); + + thd_proc_info(thd, "applying write set"); + /* moved dbug sync point here, after possible THD switch for SR transactions + has ben done + */ + /* Allow tests to block the applier thread using the DBUG facilities */ + DBUG_EXECUTE_IF("sync.wsrep_apply_cb", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_apply_cb_reached " + "WAIT_FOR signal.wsrep_apply_cb"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); + + wsrep_setup_uk_and_fk_checks(thd); + + int ret= wsrep_apply_events(thd, m_rli, data.data(), data.size()); + + if (ret || thd->wsrep_has_ignored_error) + { + wsrep_dump_rbr_buf_with_header(thd, data.data(), data.size()); + } + + thd->close_temporary_tables(); + if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + { + thd->wsrep_cs().fragment_applied(ws_meta.seqno()); + } + thd_proc_info(thd, "wsrep applied write set"); + DBUG_RETURN(ret); +} + +void Wsrep_applier_service::after_apply() +{ + DBUG_ENTER("Wsrep_applier_service::after_apply"); + wsrep_after_apply(m_thd); + DBUG_VOID_RETURN; +} + +bool Wsrep_applier_service::check_exit_status() const +{ + bool ret= false; + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + if (wsrep_slave_count_change < 0) + { + ++wsrep_slave_count_change; + ret= true; + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + return ret; +} + +/**************************************************************************** + Replayer service +*****************************************************************************/ + +Wsrep_replayer_service::Wsrep_replayer_service(THD* thd) + : Wsrep_high_priority_service(thd) + , m_da_shadow() + , m_replay_status() +{ + /* Response must not have been sent to client */ + DBUG_ASSERT(!thd->get_stmt_da()->is_sent()); + /* PS reprepare observer should have been removed already + open_table() will fail if we have dangling observer here */ + DBUG_ASSERT(!thd->m_reprepare_observer); + /* Replaying should happen always from after_statement() hook + after rollback, which should guarantee that there are no + transactional locks */ + DBUG_ASSERT(!thd->mdl_context.has_transactional_locks()); + + /* Make a shadow copy of diagnostics area and reset */ + m_da_shadow.status= thd->get_stmt_da()->status(); + if (m_da_shadow.status == Diagnostics_area::DA_OK) + { + m_da_shadow.affected_rows= thd->get_stmt_da()->affected_rows(); + m_da_shadow.last_insert_id= thd->get_stmt_da()->last_insert_id(); + strmake(m_da_shadow.message, thd->get_stmt_da()->message(), + sizeof(m_da_shadow.message) - 1); + } + thd->get_stmt_da()->reset_diagnostics_area(); + + /* Release explicit locks */ + if (thd->locked_tables_mode && thd->lock) + { + WSREP_WARN("releasing table lock for replaying (%llu)", + thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + + /* + Replaying will call MYSQL_START_STATEMENT when handling + BEGIN Query_log_event so end statement must be called before + replaying. + */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + thd_proc_info(thd, "wsrep replaying trx"); +} + +Wsrep_replayer_service::~Wsrep_replayer_service() +{ + THD* thd= m_thd; + DBUG_ASSERT(!thd->get_stmt_da()->is_sent()); + DBUG_ASSERT(!thd->get_stmt_da()->is_set()); + if (m_replay_status == wsrep::provider::success) + { + DBUG_ASSERT(thd->wsrep_cs().current_error() == wsrep::e_success); + thd->killed= NOT_KILLED; + if (m_da_shadow.status == Diagnostics_area::DA_OK) + { + my_ok(thd, + m_da_shadow.affected_rows, + m_da_shadow.last_insert_id, + m_da_shadow.message); + } + else + { + my_ok(thd); + } + } + else if (m_replay_status == wsrep::provider::error_certification_failed) + { + DBUG_ASSERT(thd->wsrep_cs().current_error() == wsrep::e_deadlock_error); + } + else + { + DBUG_ASSERT(0); + WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", + m_replay_status, + thd->db.str, WSREP_QUERY(thd)); + unireg_abort(1); + } +} + +int Wsrep_replayer_service::apply_write_set(const wsrep::ws_meta& ws_meta, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_replayer_service::apply_write_set"); + THD* thd= m_thd; + + DBUG_ASSERT(thd->wsrep_trx().active()); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_replaying); + + wsrep_setup_uk_and_fk_checks(thd); + + int ret= 0; + if (!wsrep::starts_transaction(ws_meta.flags())) + { + DBUG_ASSERT(thd->wsrep_trx().is_streaming()); + ret= wsrep_schema->replay_transaction(thd, + m_rli, + ws_meta, + thd->wsrep_sr().fragments()); + } + + ret= ret || wsrep_apply_events(thd, m_rli, data.data(), data.size()); + + if (ret || thd->wsrep_has_ignored_error) + { + wsrep_dump_rbr_buf_with_header(thd, data.data(), data.size()); + } + + thd->close_temporary_tables(); + if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + { + thd->wsrep_cs().fragment_applied(ws_meta.seqno()); + } + + thd_proc_info(thd, "wsrep replayed write set"); + DBUG_RETURN(ret); +} diff --git a/sql/wsrep_high_priority_service.h b/sql/wsrep_high_priority_service.h new file mode 100644 index 00000000000..4012ca60a3e --- /dev/null +++ b/sql/wsrep_high_priority_service.h @@ -0,0 +1,118 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_HIGH_PRIORITY_SERVICE_H +#define WSREP_HIGH_PRIORITY_SERVICE_H + +#include "wsrep/high_priority_service.hpp" +#include "wsrep/client_state.hpp" +#include "my_global.h" +#include "sql_error.h" /* Diagnostics area */ +#include "sql_class.h" /* rpl_group_info */ + +class THD; +class Relay_log_info; +class Wsrep_server_service; + +class Wsrep_high_priority_service : + public wsrep::high_priority_service, + public wsrep::high_priority_context +{ +public: + Wsrep_high_priority_service(THD*); + ~Wsrep_high_priority_service(); + int start_transaction(const wsrep::ws_handle&, + const wsrep::ws_meta&); + const wsrep::transaction& transaction() const; + void adopt_transaction(const wsrep::transaction&); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&) = 0; + int append_fragment_and_commit(const wsrep::ws_handle&, + const wsrep::ws_meta&, + const wsrep::const_buffer&); + int remove_fragments(const wsrep::ws_meta&); + int commit(const wsrep::ws_handle&, const wsrep::ws_meta&); + int rollback(const wsrep::ws_handle&, const wsrep::ws_meta&); + int apply_toi(const wsrep::ws_meta&, const wsrep::const_buffer&); + void store_globals(); + void reset_globals(); + void switch_execution_context(wsrep::high_priority_service&); + int log_dummy_write_set(const wsrep::ws_handle&, + const wsrep::ws_meta&); + + virtual bool check_exit_status() const = 0; + void debug_crash(const char*); +protected: + friend Wsrep_server_service; + THD* m_thd; + Relay_log_info* m_rli; + rpl_group_info* m_rgi; + struct shadow + { + ulonglong option_bits; + uint server_status; + struct st_vio* vio; + ulong tx_isolation; + char* db; + size_t db_length; + //struct timeval user_time; + my_hrtime_t user_time; + longlong row_count_func; + bool wsrep_applier; +} m_shadow; +}; + +class Wsrep_applier_service : public Wsrep_high_priority_service +{ +public: + Wsrep_applier_service(THD*); + ~Wsrep_applier_service(); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&); + void after_apply(); + bool is_replaying() const { return false; } + bool check_exit_status() const; +}; + +class Wsrep_replayer_service : public Wsrep_high_priority_service +{ +public: + Wsrep_replayer_service(THD*); + ~Wsrep_replayer_service(); + int apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&); + void after_apply() { } + bool is_replaying() const { return true; } + void replay_status(enum wsrep::provider::status status) + { m_replay_status = status; } + enum wsrep::provider::status replay_status() const + { return m_replay_status; } + /* Replayer should never be forced to exit */ + bool check_exit_status() const { return false; } +private: + struct da_shadow + { + enum Diagnostics_area::enum_diagnostics_status status; + ulonglong affected_rows; + ulonglong last_insert_id; + char message[MYSQL_ERRMSG_SIZE]; + da_shadow() + : status() + , affected_rows() + , last_insert_id() + , message() + { } + } m_da_shadow; + enum wsrep::provider::status m_replay_status; +}; + +#endif /* WSREP_HIGH_PRIORITY_SERVICE_H */ diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc deleted file mode 100644 index 8110faf7d11..00000000000 --- a/sql/wsrep_hton.cc +++ /dev/null @@ -1,658 +0,0 @@ -/* Copyright 2008-2015 Codership Oy <http://www.codership.com> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ - -#include "mariadb.h" -#include <mysqld.h> -#include "sql_base.h" -#include "rpl_filter.h" -#include <sql_class.h> -#include "wsrep_mysqld.h" -#include "wsrep_binlog.h" -#include "wsrep_xid.h" -#include <cstdio> -#include <cstdlib> -#include "debug_sync.h" - -extern handlerton *binlog_hton; -extern int binlog_close_connection(handlerton *hton, THD *thd); -extern ulonglong thd_to_trx_id(THD *thd); - -extern "C" int thd_binlog_format(const MYSQL_THD thd); -// todo: share interface with ha_innodb.c - -/* - Cleanup after local transaction commit/rollback, replay or TOI. -*/ -void wsrep_cleanup_transaction(THD *thd) -{ - if (!WSREP(thd)) return; - - if (wsrep_emulate_bin_log) thd_binlog_trx_reset(thd); - thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID; - thd->wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; - thd->wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; - thd->wsrep_exec_mode= LOCAL_STATE; - thd->wsrep_affected_rows= 0; - thd->wsrep_skip_wsrep_GTID= false; - return; -} - -/* - wsrep hton -*/ -handlerton *wsrep_hton; - - -/* - Registers wsrep hton at commit time if transaction has registered htons - for supported engine types. - - Hton should not be registered for TOTAL_ORDER operations. - - Registration is needed for both LOCAL_MODE and REPL_RECV transactions to run - commit in 2pc so that wsrep position gets properly recorded in storage - engines. - - Note that all hton calls should immediately return for threads that are - in REPL_RECV mode as their states are controlled by wsrep appliers or - replaying code. Only threads in LOCAL_MODE should run wsrep callbacks - from hton methods. -*/ -void wsrep_register_hton(THD* thd, bool all) -{ - if (WSREP(thd) && thd->wsrep_exec_mode != TOTAL_ORDER && - !thd->wsrep_apply_toi) - { - if (thd->wsrep_exec_mode == LOCAL_STATE && - (thd_sql_command(thd) == SQLCOM_OPTIMIZE || - thd_sql_command(thd) == SQLCOM_ANALYZE || - thd_sql_command(thd) == SQLCOM_REPAIR) && - thd->lex->no_write_to_binlog == 1) - { - WSREP_DEBUG("Skipping wsrep_register_hton for LOCAL sql admin command : %s", - thd->query()); - return; - } - - THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; - for (Ha_trx_info *i= trans->ha_list; i; i = i->next()) - { - if ((i->ht()->db_type == DB_TYPE_INNODB) || - (i->ht()->db_type == DB_TYPE_TOKUDB)) - { - trans_register_ha(thd, all, wsrep_hton); - - /* follow innodb read/write settting - * but, as an exception: CTAS with empty result set will not be - * replicated unless we declare wsrep hton as read/write here - */ - if (i->is_trx_read_write() || - ((thd->lex->sql_command == SQLCOM_CREATE_TABLE || - thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE) && - thd->wsrep_exec_mode == LOCAL_STATE)) - { - thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write(); - } - break; - } - } - } -} - -/* - Calls wsrep->post_commit() for locally executed transactions that have - got seqno from provider (must commit) and don't require replaying. - */ -void wsrep_post_commit(THD* thd, bool all) -{ - if (!WSREP(thd)) return; - - switch (thd->wsrep_exec_mode) - { - case LOCAL_COMMIT: - { - DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); - if (wsrep && wsrep->post_commit(wsrep, &thd->wsrep_ws_handle)) - { - DBUG_PRINT("wsrep", ("set committed fail")); - WSREP_WARN("set committed fail: %llu %d", - (long long)thd->real_id, thd->get_stmt_da()->status()); - } - wsrep_cleanup_transaction(thd); - break; - } - case LOCAL_STATE: - { - /* non-InnoDB statements may have populated events in stmt cache - => cleanup - */ - WSREP_DEBUG("cleanup transaction for LOCAL_STATE"); - /* - Run post-rollback hook to clean up in the case if - some keys were populated for the transaction in provider - but during commit time there was no write set to replicate. - This may happen when client sets the SAVEPOINT and immediately - rolls back to savepoint after first operation. - */ - if (all && thd->wsrep_conflict_state != MUST_REPLAY && - wsrep && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) - { - WSREP_WARN("post_rollback fail: %llu %d", - (long long)thd->thread_id, thd->get_stmt_da()->status()); - } - wsrep_cleanup_transaction(thd); - break; - } - default: break; - } -} - -/* - wsrep exploits binlog's caches even if binlogging itself is not - activated. In such case connection close needs calling - actual binlog's method. - Todo: split binlog hton from its caches to use ones by wsrep - without referring to binlog's stuff. -*/ -static int -wsrep_close_connection(handlerton* hton, THD* thd) -{ - DBUG_ENTER("wsrep_close_connection"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - if (wsrep_emulate_bin_log && thd_get_ha_data(thd, binlog_hton) != NULL) - binlog_hton->close_connection (binlog_hton, thd); - DBUG_RETURN(0); -} - -/* - prepare/wsrep_run_wsrep_commit can fail in two ways - - certification test or an equivalent. As a result, - the current transaction just rolls back - Error codes: - WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR - - a post-certification failure makes this server unable to - commit its own WS and therefore the server must abort -*/ -static int wsrep_prepare(handlerton *hton, THD *thd, bool all) -{ - DBUG_ENTER("wsrep_prepare"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); - DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); - DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); - - if ((all || - !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && - (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd))) - { - int res= wsrep_run_wsrep_commit(thd, all); - if (res != 0) - { - if (res == WSREP_TRX_SIZE_EXCEEDED) - res= EMSGSIZE; - else - res= EDEADLK; // for a better error message - } - DBUG_RETURN (res); - } - DBUG_RETURN(0); -} - -static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv) -{ - DBUG_ENTER("wsrep_savepoint_set"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - if (!wsrep_emulate_bin_log) DBUG_RETURN(0); - int rcode = wsrep_binlog_savepoint_set(thd, sv); - DBUG_RETURN(rcode); -} - -static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv) -{ - DBUG_ENTER("wsrep_savepoint_rollback"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - if (!wsrep_emulate_bin_log) DBUG_RETURN(0); - int rcode = wsrep_binlog_savepoint_rollback(thd, sv); - DBUG_RETURN(rcode); -} - -static int wsrep_rollback(handlerton *hton, THD *thd, bool all) -{ - DBUG_ENTER("wsrep_rollback"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - mysql_mutex_lock(&thd->LOCK_thd_data); - switch (thd->wsrep_exec_mode) - { - case TOTAL_ORDER: - case REPL_RECV: - mysql_mutex_unlock(&thd->LOCK_thd_data); - WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query()); - DBUG_RETURN(0); - default: break; - } - - if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && - thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY) - { - if (wsrep && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) - { - DBUG_PRINT("wsrep", ("setting rollback fail")); - WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s", - (long long)thd->real_id, thd->get_db(), thd->query()); - } - wsrep_cleanup_transaction(thd); - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_RETURN(0); -} - -int wsrep_commit(handlerton *hton, THD *thd, bool all) -{ - DBUG_ENTER("wsrep_commit"); - - if (thd->wsrep_exec_mode == REPL_RECV) - { - DBUG_RETURN(0); - } - - mysql_mutex_lock(&thd->LOCK_thd_data); - if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && - (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) - { - if (thd->wsrep_exec_mode == LOCAL_COMMIT) - { - DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); - /* - Call to wsrep->post_commit() (moved to wsrep_post_commit()) must - be done only after commit has done for all involved htons. - */ - DBUG_PRINT("wsrep", ("commit")); - } - else - { - /* - Transaction didn't go through wsrep->pre_commit() so just roll back - possible changes to clean state. - */ - if (WSREP_PROVIDER_EXISTS) { - if (wsrep && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) - { - DBUG_PRINT("wsrep", ("setting rollback fail")); - WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s", - (long long)thd->real_id, thd->get_db(), - thd->query()); - } - } - wsrep_cleanup_transaction(thd); - } - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_RETURN(0); -} - - -extern Rpl_filter* binlog_filter; -extern my_bool opt_log_slave_updates; - -enum wsrep_trx_status -wsrep_run_wsrep_commit(THD *thd, bool all) -{ - int rcode= -1; - size_t data_len= 0; - IO_CACHE *cache; - int replay_round= 0; - DBUG_ENTER("wsrep_run_wsrep_commit"); - - if (thd->get_stmt_da()->is_error()) { - WSREP_DEBUG("commit issue, error: %d %s", - thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); - } - - DEBUG_SYNC(thd, "wsrep_before_replication"); - - if (thd->slave_thread && !opt_log_slave_updates) DBUG_RETURN(WSREP_TRX_OK); - - if (thd->wsrep_exec_mode == REPL_RECV) { - - mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == MUST_ABORT) { - if (wsrep_debug) - WSREP_INFO("WSREP: must abort for BF"); - DBUG_PRINT("wsrep", ("BF apply commit fail")); - thd->wsrep_conflict_state = NO_CONFLICT; - mysql_mutex_unlock(&thd->LOCK_thd_data); - // - // TODO: test all calls of the rollback. - // rollback must happen automagically innobase_rollback(hton, thd, 1); - // - DBUG_RETURN(WSREP_TRX_ERROR); - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - } - - if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK); - - if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) { - WSREP_DEBUG("commit for consistency check: %s", thd->query()); - DBUG_RETURN(WSREP_TRX_OK); - } - - DBUG_PRINT("wsrep", ("replicating commit")); - - mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == MUST_ABORT) { - DBUG_PRINT("wsrep", ("replicate commit fail")); - thd->wsrep_conflict_state = ABORTED; - mysql_mutex_unlock(&thd->LOCK_thd_data); - if (wsrep_debug) { - WSREP_INFO("innobase_commit, abort %s", - (thd->query()) ? thd->query() : "void"); - } - DBUG_RETURN(WSREP_TRX_CERT_FAIL); - } - - mysql_mutex_lock(&LOCK_wsrep_replaying); - - DBUG_PRINT("info", ("wsrep_replaying: %d wsrep_conflict_state: %d killed: %d shutdown_in_progress: %d", - (int) wsrep_replaying, (int) thd->wsrep_conflict_state, - (int) thd->killed, - (int) shutdown_in_progress)); - - while (wsrep_replaying > 0 && - thd->wsrep_conflict_state == NO_CONFLICT && - thd->killed == NOT_KILLED && - !shutdown_in_progress) - { - - mysql_mutex_unlock(&LOCK_wsrep_replaying); - mysql_mutex_unlock(&thd->LOCK_thd_data); - - mysql_mutex_lock(&thd->mysys_var->mutex); - thd_proc_info(thd, "WSREP waiting on replaying"); - thd->mysys_var->current_mutex= &LOCK_wsrep_replaying; - thd->mysys_var->current_cond= &COND_wsrep_replaying; - mysql_mutex_unlock(&thd->mysys_var->mutex); - - mysql_mutex_lock(&LOCK_wsrep_replaying); - // Using timedwait is a hack to avoid deadlock in case if BF victim - // misses the signal. - struct timespec wtime = {0, 1000000}; - mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying, - &wtime); - - if (replay_round++ % 100000 == 0) - WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: %lld " - "conflict: %d (round: %d)", - wsrep_replaying, (longlong) thd->thread_id, - thd->wsrep_conflict_state, replay_round); - - mysql_mutex_unlock(&LOCK_wsrep_replaying); - - mysql_mutex_lock(&thd->mysys_var->mutex); - thd->mysys_var->current_mutex= 0; - thd->mysys_var->current_cond= 0; - mysql_mutex_unlock(&thd->mysys_var->mutex); - - mysql_mutex_lock(&thd->LOCK_thd_data); - mysql_mutex_lock(&LOCK_wsrep_replaying); - } - mysql_mutex_unlock(&LOCK_wsrep_replaying); - - if (thd->wsrep_conflict_state == MUST_ABORT) { - DBUG_PRINT("wsrep", ("replicate commit fail")); - thd->wsrep_conflict_state = ABORTED; - mysql_mutex_unlock(&thd->LOCK_thd_data); - WSREP_DEBUG("innobase_commit abort after replaying wait %s", - (thd->query()) ? thd->query() : "void"); - DBUG_RETURN(WSREP_TRX_CERT_FAIL); - } - - thd->wsrep_query_state = QUERY_COMMITTING; - mysql_mutex_unlock(&thd->LOCK_thd_data); - - cache = get_trans_log(thd); - rcode = 0; - if (cache) { - thd->binlog_flush_pending_rows_event(true); - rcode = wsrep_write_cache(wsrep, thd, cache, &data_len); - if (WSREP_OK != rcode) { - WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode); - DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); - } - } - - DBUG_PRINT("info", ("rcode: %d wsrep_conflict_state: %d", - rcode, thd->wsrep_conflict_state)); - - if (data_len == 0) - { - if (thd->get_stmt_da()->is_ok() && - thd->get_stmt_da()->affected_rows() > 0 && - !binlog_filter->is_on()) - { - WSREP_DEBUG("empty rbr buffer, query: %s, " - "affected rows: %llu, " - "changed tables: %d, " - "sql_log_bin: %d, " - "wsrep status (%d %d %d)", - thd->query(), thd->get_stmt_da()->affected_rows(), - stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin, - thd->wsrep_exec_mode, thd->wsrep_query_state, - thd->wsrep_conflict_state); - } - else - { - WSREP_DEBUG("empty rbr buffer, query: %s", thd->query()); - } - thd->wsrep_query_state= QUERY_EXEC; - DBUG_RETURN(WSREP_TRX_OK); - } - - if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id) - { - WSREP_WARN("SQL statement was ineffective thd: %lld buf: %zu\n" - "schema: %s \n" - "QUERY: %s\n" - " => Skipping replication", - (longlong) thd->thread_id, data_len, - thd->get_db(), thd->query()); - rcode = WSREP_TRX_FAIL; - } - else if (!rcode) - { - if (WSREP_OK == rcode && wsrep) - rcode = wsrep->pre_commit(wsrep, - (wsrep_conn_id_t)thd->thread_id, - &thd->wsrep_ws_handle, - WSREP_FLAG_COMMIT | - ((thd->wsrep_PA_safe) ? - 0ULL : WSREP_FLAG_PA_UNSAFE), - &thd->wsrep_trx_meta); - - DBUG_PRINT("info", ("rcode after pre_commit: %d", rcode)); - - if (rcode == WSREP_TRX_MISSING) { - WSREP_WARN("Transaction missing in provider, thd: %lld schema: %s SQL: %s", - (longlong) thd->thread_id, - thd->get_db(), thd->query()); - rcode = WSREP_TRX_FAIL; - } else if (rcode == WSREP_BF_ABORT) { - WSREP_DEBUG("thd: %lld seqno: %lld BF aborted by provider, will replay", - (longlong) thd->thread_id, - (longlong) thd->wsrep_trx_meta.gtid.seqno); - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_conflict_state = MUST_REPLAY; - DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0); - mysql_mutex_unlock(&thd->LOCK_thd_data); - mysql_mutex_lock(&LOCK_wsrep_replaying); - wsrep_replaying++; - WSREP_DEBUG("replaying increased: %d, thd: %lld", - wsrep_replaying, (longlong) thd->thread_id); - mysql_mutex_unlock(&LOCK_wsrep_replaying); - } - } else { - WSREP_ERROR("I/O error reading from thd's binlog iocache: " - "errno=%d, io cache code=%d", my_errno, cache->error); - DBUG_ASSERT(0); // failure like this can not normally happen - DBUG_RETURN(WSREP_TRX_ERROR); - } - - mysql_mutex_lock(&thd->LOCK_thd_data); - - DEBUG_SYNC(thd, "wsrep_after_replication"); - - DBUG_PRINT("info", ("rcode: %d wsrep_conflict_state: %d", - rcode, thd->wsrep_conflict_state)); - - switch(rcode) { - case 0: - /* - About MUST_ABORT: We assume that even if thd conflict state was set - to MUST_ABORT, underlying transaction was not rolled back or marked - as deadlock victim in QUERY_COMMITTING state. Conflict state is - set to NO_CONFLICT and commit proceeds as usual. - */ - if (thd->wsrep_conflict_state == MUST_ABORT) - thd->wsrep_conflict_state= NO_CONFLICT; - - if (thd->wsrep_conflict_state != NO_CONFLICT) - { - WSREP_WARN("thd: %llu seqno: %lld conflict state %d after post commit", - (longlong) thd->thread_id, - (longlong) thd->wsrep_trx_meta.gtid.seqno, - thd->wsrep_conflict_state); - } - thd->wsrep_exec_mode= LOCAL_COMMIT; - DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); - /* Override XID iff it was generated by mysql */ - if (thd->transaction.xid_state.xid.get_my_xid()) - { - wsrep_xid_init(&thd->transaction.xid_state.xid, - thd->wsrep_trx_meta.gtid.uuid, - thd->wsrep_trx_meta.gtid.seqno); - } - DBUG_PRINT("wsrep", ("replicating commit success")); - break; - case WSREP_BF_ABORT: - DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); - /* fall through */ - case WSREP_TRX_FAIL: - WSREP_DEBUG("commit failed for reason: %d", rcode); - DBUG_PRINT("wsrep", ("replicating commit fail")); - - thd->wsrep_query_state= QUERY_EXEC; - - if (thd->wsrep_conflict_state == MUST_ABORT) { - thd->wsrep_conflict_state= ABORTED; - } - else - { - WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state); - if (thd->wsrep_conflict_state == NO_CONFLICT) - { - thd->wsrep_conflict_state = CERT_FAILURE; - WSREP_LOG_CONFLICT(NULL, thd, FALSE); - } - } - mysql_mutex_unlock(&thd->LOCK_thd_data); - - DBUG_RETURN(WSREP_TRX_CERT_FAIL); - - case WSREP_SIZE_EXCEEDED: - WSREP_ERROR("transaction size exceeded"); - mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); - case WSREP_CONN_FAIL: - WSREP_ERROR("connection failure"); - mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_RETURN(WSREP_TRX_ERROR); - default: - WSREP_ERROR("unknown connection failure"); - mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_RETURN(WSREP_TRX_ERROR); - } - - thd->wsrep_query_state= QUERY_EXEC; - mysql_mutex_unlock(&thd->LOCK_thd_data); - - DBUG_RETURN(WSREP_TRX_OK); -} - - -static int wsrep_hton_init(void *p) -{ - wsrep_hton= (handlerton *)p; - //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; - wsrep_hton->state= SHOW_OPTION_YES; - wsrep_hton->db_type=(legacy_db_type)0; - wsrep_hton->savepoint_offset= sizeof(my_off_t); - wsrep_hton->close_connection= wsrep_close_connection; - wsrep_hton->savepoint_set= wsrep_savepoint_set; - wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback; - wsrep_hton->commit= wsrep_commit; - wsrep_hton->rollback= wsrep_rollback; - wsrep_hton->prepare= wsrep_prepare; - wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags - return 0; -} - - -struct st_mysql_storage_engine wsrep_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION }; - - -maria_declare_plugin(wsrep) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, - &wsrep_storage_engine, - "wsrep", - "Codership Oy", - "A pseudo storage engine to represent transactions in multi-master " - "synchornous replication", - PLUGIN_LICENSE_GPL, - wsrep_hton_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0100 /* 1.0 */, - NULL, /* status variables */ - NULL, /* system variables */ - "1.0", /* string version */ - MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ -} -maria_declare_plugin_end; diff --git a/sql/wsrep_mutex.h b/sql/wsrep_mutex.h new file mode 100644 index 00000000000..3454b44e0ec --- /dev/null +++ b/sql/wsrep_mutex.h @@ -0,0 +1,50 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_MUTEX_H +#define WSREP_MUTEX_H + +/* wsrep-lib */ +#include "wsrep/mutex.hpp" + +/* implementation */ +#include "my_pthread.h" + +class Wsrep_mutex : public wsrep::mutex +{ +public: + Wsrep_mutex(mysql_mutex_t& mutex) + : m_mutex(mutex) + { } + + void lock() + { + mysql_mutex_lock(&m_mutex); + } + + void unlock() + { + mysql_mutex_unlock(&m_mutex); + } + + void* native() + { + return &m_mutex; + } +private: + mysql_mutex_t& m_mutex; +}; + +#endif /* WSREP_MUTEX_H */ diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 58b30a1e77f..73f201f12ca 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -14,7 +14,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #include "sql_plugin.h" /* wsrep_plugins_pre_init() */ +#include "my_global.h" +#include "wsrep_server_state.h" + +#include "mariadb.h" #include <mysqld.h> +#include <transaction.h> #include <sql_class.h> #include <sql_parse.h> #include <sql_base.h> /* find_temporary_table() */ @@ -33,26 +38,32 @@ #include "wsrep_var.h" #include "wsrep_binlog.h" #include "wsrep_applier.h" +#include "wsrep_schema.h" #include "wsrep_xid.h" +#include "wsrep_trans_observer.h" +#include "mysql/service_wsrep.h" #include <cstdio> #include <cstdlib> +#include <string> #include "log_event.h" #include <slave.h> -wsrep_t *wsrep = NULL; -/* - wsrep_emulate_bin_log is a flag to tell that binlog has not been configured. - wsrep needs to get binlog events from transaction cache even when binlog is - not enabled, wsrep_emulate_bin_log opens needed code paths to make this - possible -*/ -my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface +#include <sstream> + +/* wsrep-lib */ +Wsrep_server_state* Wsrep_server_state::m_instance; + +my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface #ifdef GTID_SUPPORT /* Sidno in global_sid_map corresponding to group uuid */ rpl_sidno wsrep_sidno= -1; #endif /* GTID_SUPPORT */ my_bool wsrep_preordered_opt= FALSE; +/* Streaming Replication */ +const char *wsrep_fragment_units[]= { "bytes", "rows", "statements", NullS }; +const char *wsrep_SR_store_types[]= { "none", "table", NullS }; + /* * Begin configuration options */ @@ -82,7 +93,7 @@ my_bool wsrep_certify_nonPK; // Certify, even when no primary my_bool wsrep_recovery; // Recovery my_bool wsrep_replicate_myisam; // Enable MyISAM replication my_bool wsrep_log_conflicts; -my_bool wsrep_load_data_splitting; // Commit load data every 10K intervals +my_bool wsrep_load_data_splitting= 0; // Commit load data every 10K intervals my_bool wsrep_slave_UK_checks; // Slave thread does UK checks my_bool wsrep_slave_FK_checks; // Slave thread does FK checks my_bool wsrep_restart_slave; // Should mysql slave thread be @@ -107,7 +118,13 @@ my_bool wsrep_restart_slave_activated= 0; // Node has dropped, and slave bool wsrep_new_cluster= false; // Bootstrap the cluster? int wsrep_slave_count_change= 0; // No. of appliers to stop/start int wsrep_to_isolation= 0; // No. of active TO isolation threads -long wsrep_max_protocol_version= 3; // Maximum protocol version to use +long wsrep_max_protocol_version= 4; // Maximum protocol version to use +long int wsrep_protocol_version= wsrep_max_protocol_version; +ulong wsrep_trx_fragment_unit= WSREP_FRAG_BYTES; + // unit for fragment size +ulong wsrep_SR_store_type= WSREP_SR_STORE_TABLE; +uint wsrep_ignore_apply_errors= 0; + /* * End configuration options @@ -123,29 +140,33 @@ mysql_mutex_t LOCK_wsrep_sst; mysql_cond_t COND_wsrep_sst; mysql_mutex_t LOCK_wsrep_sst_init; mysql_cond_t COND_wsrep_sst_init; -mysql_mutex_t LOCK_wsrep_rollback; -mysql_cond_t COND_wsrep_rollback; -wsrep_aborting_thd_t wsrep_aborting_thd= NULL; mysql_mutex_t LOCK_wsrep_replaying; mysql_cond_t COND_wsrep_replaying; mysql_mutex_t LOCK_wsrep_slave_threads; mysql_mutex_t LOCK_wsrep_desync; mysql_mutex_t LOCK_wsrep_config_state; +mysql_mutex_t LOCK_wsrep_SR_pool; +mysql_mutex_t LOCK_wsrep_SR_store; int wsrep_replaying= 0; -ulong wsrep_running_threads = 0; // # of currently running wsrep threads +ulong wsrep_running_threads= 0; // # of currently running wsrep threads ulong my_bind_addr; #ifdef HAVE_PSI_INTERFACE -PSI_mutex_key key_LOCK_wsrep_rollback, +PSI_mutex_key key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init, key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_desync, - key_LOCK_wsrep_config_state; + key_LOCK_wsrep_config_state, + key_LOCK_wsrep_SR_pool, + key_LOCK_wsrep_SR_store, + key_LOCK_wsrep_thd_queue; -PSI_cond_key key_COND_wsrep_rollback, +PSI_cond_key key_COND_wsrep_thd, key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, - key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread; + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread, + key_COND_wsrep_thd_queue; + PSI_file_key key_file_wsrep_gra_log; @@ -156,11 +177,12 @@ static PSI_mutex_info wsrep_mutexes[]= { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, - { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL}, { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL}, { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL}, - { &key_LOCK_wsrep_config_state, "LOCK_wsrep_config_state", PSI_FLAG_GLOBAL} + { &key_LOCK_wsrep_config_state, "LOCK_wsrep_config_state", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_SR_pool, "LOCK_wsrep_SR_pool", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_SR_store, "LOCK_wsrep_SR_store", PSI_FLAG_GLOBAL} }; static PSI_cond_info wsrep_conds[]= @@ -169,7 +191,7 @@ static PSI_cond_info wsrep_conds[]= { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, - { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0}, { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL} }; @@ -179,310 +201,219 @@ static PSI_file_info wsrep_files[]= }; #endif -my_bool wsrep_inited = 0; // initialized ? +my_bool wsrep_inited= 0; // initialized ? -static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; +static wsrep_uuid_t node_uuid= WSREP_UUID_UNDEFINED; +static wsrep_uuid_t cluster_uuid= WSREP_UUID_UNDEFINED; static char cluster_uuid_str[40]= { 0, }; -static const char* cluster_status_str[WSREP_VIEW_MAX] = -{ - "Primary", - "non-Primary", - "Disconnected" -}; static char provider_name[256]= { 0, }; static char provider_version[256]= { 0, }; static char provider_vendor[256]= { 0, }; /* - * wsrep status variables + * Wsrep status variables. LOCK_status must be locked When modifying + * these variables, */ -my_bool wsrep_connected = FALSE; -my_bool wsrep_ready = FALSE; // node can accept queries -const char* wsrep_cluster_state_uuid = cluster_uuid_str; -long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; -const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED]; -long wsrep_cluster_size = 0; -long wsrep_local_index = -1; -long long wsrep_local_bf_aborts = 0; -const char* wsrep_provider_name = provider_name; -const char* wsrep_provider_version = provider_version; -const char* wsrep_provider_vendor = provider_vendor; +my_bool wsrep_connected = FALSE; +my_bool wsrep_ready = FALSE; +const char* wsrep_cluster_state_uuid= cluster_uuid_str; +long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; +const char* wsrep_cluster_status = "Disconnected"; +long wsrep_cluster_size = 0; +long wsrep_local_index = -1; +long long wsrep_local_bf_aborts = 0; +const char* wsrep_provider_name = provider_name; +const char* wsrep_provider_version = provider_version; +const char* wsrep_provider_vendor = provider_vendor; +char* wsrep_provider_capabilities = NULL; +char* wsrep_cluster_capabilities = NULL; /* End wsrep status variables */ -wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; -wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; -long wsrep_protocol_version = 3; - wsp::Config_state *wsrep_config_state; -// Boolean denoting if server is in initial startup phase. This is needed -// to make sure that main thread waiting in wsrep_sst_wait() is signaled -// if there was no state gap on receiving first view event. -static my_bool wsrep_startup = TRUE; +wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; +wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; +wsp::node_status local_status; -static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) { - switch (level) { - case WSREP_LOG_INFO: - sql_print_information("WSREP: %s", msg); - break; - case WSREP_LOG_WARN: - sql_print_warning("WSREP: %s", msg); - break; - case WSREP_LOG_ERROR: - case WSREP_LOG_FATAL: +/* + */ +Wsrep_schema *wsrep_schema= 0; + +static void wsrep_log_cb(wsrep::log::level level, const char *msg) +{ + /* + Silence all wsrep related logging from lib and provider if + wsrep is not enabled. + */ + if (WSREP_ON) + { + switch (level) { + case wsrep::log::info: + sql_print_information("WSREP: %s", msg); + break; + case wsrep::log::warning: + sql_print_warning("WSREP: %s", msg); + break; + case wsrep::log::error: sql_print_error("WSREP: %s", msg); break; - case WSREP_LOG_DEBUG: - if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); - default: - break; + case wsrep::log::debug: + if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); + default: + break; + } } } -void wsrep_log(void (*fun)(const char *, ...), const char *format, ...) +void wsrep_init_sidno(const wsrep::id& uuid) { - va_list args; - char msg[1024]; - va_start(args, format); - vsnprintf(msg, sizeof(msg) - 1, format, args); - va_end(args); - (fun)("WSREP: %s", msg); -} - - -static void wsrep_log_states (wsrep_log_level_t const level, - const wsrep_uuid_t* const group_uuid, - wsrep_seqno_t const group_seqno, - const wsrep_uuid_t* const node_uuid, - wsrep_seqno_t const node_seqno) -{ - char uuid_str[37]; - char msg[256]; - - wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str)); - snprintf (msg, 255, "WSREP: Group state: %s:%lld", - uuid_str, (long long)group_seqno); - wsrep_log_cb (level, msg); - - wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str)); - snprintf (msg, 255, "WSREP: Local state: %s:%lld", - uuid_str, (long long)node_seqno); - wsrep_log_cb (level, msg); -} - -#ifdef GTID_SUPPORT -void wsrep_init_sidno(const wsrep_uuid_t& wsrep_uuid) -{ - /* generate new Sid map entry from inverted uuid */ - rpl_sid sid; - wsrep_uuid_t ltid_uuid; - - for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + /* + Protocol versions starting from 4 use group gtid as it is. + For lesser protocol versions generate new Sid map entry from inverted + uuid. + */ + rpl_gtid sid; + if (wsrep_protocol_version >= 4) { - ltid_uuid.data[i] = ~wsrep_uuid.data[i]; + memcpy((void*)&sid, (const uchar*)uuid.data(),16); } - - sid.copy_from(ltid_uuid.data); + else + { + wsrep_uuid_t ltid_uuid; + for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + { + ltid_uuid.data[i]= ~((const uchar*)uuid.data())[i]; + } + memcpy((void*)&sid, (const uchar*)ltid_uuid.data,16); + } +#ifdef GTID_SUPPORT global_sid_lock->wrlock(); wsrep_sidno= global_sid_map->add_sid(sid); WSREP_INFO("Initialized wsrep sidno %d", wsrep_sidno); global_sid_lock->unlock(); +#endif } -#endif /* GTID_SUPPORT */ -static wsrep_cb_status_t -wsrep_view_handler_cb (void* app_ctx, - void* recv_ctx, - const wsrep_view_info_t* view, - const char* state, - size_t state_len, - void** sst_req, - size_t* sst_req_len) +void wsrep_init_schema() { - *sst_req = NULL; - *sst_req_len = 0; - - wsrep_member_status_t memb_status= wsrep_config_state->get_status(); - - if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t))) - { - memcpy(&cluster_uuid, &view->state_id.uuid, sizeof(cluster_uuid)); - - wsrep_uuid_print (&cluster_uuid, cluster_uuid_str, - sizeof(cluster_uuid_str)); - } - - wsrep_cluster_conf_id= view->view; - wsrep_cluster_status= cluster_status_str[view->status]; - wsrep_cluster_size= view->memb_num; - wsrep_local_index= view->my_idx; - - WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, " - "number of nodes: %ld, my index: %ld, protocol version %d", - wsrep_cluster_state_uuid, (long long)view->state_id.seqno, - (long long)wsrep_cluster_conf_id, wsrep_cluster_status, - wsrep_cluster_size, wsrep_local_index, view->proto_ver); - - /* Proceed further only if view is PRIMARY */ - if (WSREP_VIEW_PRIMARY != view->status) - { -#ifdef HAVE_QUERY_CACHE - // query cache must be initialised by now - query_cache.flush(); -#endif /* HAVE_QUERY_CACHE */ - - wsrep_ready_set(FALSE); - memb_status= WSREP_MEMBER_UNDEFINED; - /* Always record local_uuid and local_seqno in non-prim since this - * may lead to re-initializing provider and start position is - * determined according to these variables */ - // WRONG! local_uuid should be the last primary configuration uuid we were - // a member of. local_seqno should be updated in commit calls. - // local_uuid= cluster_uuid; - // local_seqno= view->first - 1; - goto out; - } + DBUG_ASSERT(!wsrep_schema); - switch (view->proto_ver) + WSREP_INFO("wsrep_init_schema_and_SR %p", wsrep_schema); + if (!wsrep_schema) { - case 0: - case 1: - case 2: - case 3: - // version change - if (view->proto_ver != wsrep_protocol_version) - { - my_bool wsrep_ready_saved= wsrep_ready_get(); - wsrep_ready_set(FALSE); - WSREP_INFO("closing client connections for " - "protocol change %ld -> %d", - wsrep_protocol_version, view->proto_ver); - wsrep_close_client_connections(TRUE); - wsrep_protocol_version= view->proto_ver; - wsrep_ready_set(wsrep_ready_saved); - } - break; - default: - WSREP_ERROR("Unsupported application protocol version: %d", - view->proto_ver); - unireg_abort(1); - } - - if (view->state_gap) - { - WSREP_WARN("Gap in state sequence. Need state transfer."); - - /* After that wsrep will call wsrep_sst_prepare. */ - /* keep ready flag 0 until we receive the snapshot */ - wsrep_ready_set(FALSE); - - /* Close client connections to ensure that they don't interfere - * with SST. Necessary only if storage engines are initialized - * before SST. - * TODO: Just killing all ongoing transactions should be enough - * since wsrep_ready is OFF and no new transactions can start. - */ - if (!wsrep_before_SE()) + wsrep_schema= new Wsrep_schema(); + if (wsrep_schema->init()) { - WSREP_DEBUG("[debug]: closing client connections for PRIM"); - wsrep_close_client_connections(FALSE); + WSREP_ERROR("Failed to init wsrep schema"); + unireg_abort(1); } + } +} - ssize_t const req_len= wsrep_sst_prepare (sst_req); +void wsrep_deinit_schema() +{ + delete wsrep_schema; + wsrep_schema= 0; +} - if (req_len < 0) +void wsrep_recover_sr_from_storage(THD *orig_thd) +{ + switch (wsrep_SR_store_type) + { + case WSREP_SR_STORE_TABLE: + if (!wsrep_schema) { - WSREP_ERROR("SST preparation failed: %zd (%s)", -req_len, - strerror(-req_len)); - memb_status= WSREP_MEMBER_UNDEFINED; + WSREP_ERROR("Wsrep schema not initialized when trying to recover " + "streaming transactions"); + unireg_abort(1); } - else + if (wsrep_schema->recover_sr_transactions(orig_thd)) { - assert(sst_req != NULL); - *sst_req_len= req_len; - memb_status= WSREP_MEMBER_JOINER; + WSREP_ERROR("Failed to recover SR transactions from schema"); + unireg_abort(1); } + break; + default: + /* */ + WSREP_ERROR("Unsupported wsrep SR store type: %lu", wsrep_SR_store_type); + unireg_abort(1); + break; } - else - { - /* - * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized - * before - OR - it was reinitilized on startup (lp:992840) - */ - if (wsrep_startup) +} + +/** Export the WSREP provider's capabilities as a human readable string. + * The result is saved in a dynamically allocated string of the form: + * :cap1:cap2:cap3: + */ +static void wsrep_capabilities_export(wsrep_cap_t const cap, char** str) +{ + static const char* names[] = + { + /* Keep in sync with wsrep/wsrep_api.h WSREP_CAP_* macros. */ + "MULTI_MASTER", + "CERTIFICATION", + "PARALLEL_APPLYING", + "TRX_REPLAY", + "ISOLATION", + "PAUSE", + "CAUSAL_READS", + "CAUSAL_TRX", + "INCREMENTAL_WRITESET", + "SESSION_LOCKS", + "DISTRIBUTED_LOCKS", + "CONSISTENCY_CHECK", + "UNORDERED", + "ANNOTATION", + "PREORDERED", + "STREAMING", + "SNAPSHOT", + "NBO", + }; + + std::string s; + for (size_t i= 0; i < sizeof(names) / sizeof(names[0]); ++i) + { + if (cap & (1ULL << i)) { - if (wsrep_before_SE()) + if (s.empty()) { - wsrep_SE_init_grab(); - // Signal mysqld init thread to continue - wsrep_sst_complete (&cluster_uuid, view->state_id.seqno, false); - // and wait for SE initialization - wsrep_SE_init_wait(); + s= ":"; } - else - { - local_uuid= cluster_uuid; - local_seqno= view->state_id.seqno; - } - /* Init storage engine XIDs from first view */ - wsrep_set_SE_checkpoint(local_uuid, local_seqno); -#ifdef GTID_SUPPORT - wsrep_init_sidno(local_uuid); -#endif /* GTID_SUPPORT */ - memb_status= WSREP_MEMBER_JOINED; - } - - // just some sanity check - if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t))) - { - WSREP_ERROR("Undetected state gap. Can't continue."); - wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->state_id.seqno, - &local_uuid, -1); - unireg_abort(1); + s += names[i]; + s += ":"; } } - if (wsrep_auto_increment_control) - { - global_system_variables.auto_increment_offset= view->my_idx + 1; - global_system_variables.auto_increment_increment= view->memb_num; - } + /* A read from the string pointed to by *str may be started at any time, + * so it must never point to free(3)d memory or non '\0' terminated string. */ - { /* capabilities may be updated on new configuration */ - uint64_t const caps(wsrep->capabilities (wsrep)); - - my_bool const idc((caps & WSREP_CAP_INCREMENTAL_WRITESET) != 0); - if (TRUE == wsrep_incremental_data_collection && FALSE == idc) - { - WSREP_WARN("Unsupported protocol downgrade: " - "incremental data collection disabled. Expect abort."); - } - wsrep_incremental_data_collection = idc; - } + char* const previous= *str; -out: - if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE; - wsrep_config_state->set(memb_status, view); + *str= strdup(s.c_str()); - return WSREP_CB_SUCCESS; + if (previous != NULL) + { + free(previous); + } } -my_bool wsrep_ready_set (my_bool x) +/* Verifies that SE position is consistent with the group position + * and initializes other variables */ +void wsrep_verify_SE_checkpoint(const wsrep_uuid_t& uuid, + wsrep_seqno_t const seqno) { - WSREP_DEBUG("Setting wsrep_ready to %d", x); - if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); - my_bool ret= (wsrep_ready != x); - if (ret) - { - wsrep_ready= x; - mysql_cond_signal (&COND_wsrep_ready); - } - mysql_mutex_unlock (&LOCK_wsrep_ready); - return ret; } +/* + Wsrep is considered ready if + 1) Provider is not loaded (native mode) + 2) Server has reached synced state + 3) Server is in joiner mode and mysqldump SST method has been + specified + See Wsrep_server_service::log_state_change() for further details. + */ my_bool wsrep_ready_get (void) { if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); @@ -499,178 +430,67 @@ int wsrep_show_ready(THD *thd, SHOW_VAR *var, char *buff) return 0; } -// Wait until wsrep has reached ready state -void wsrep_ready_wait () +void wsrep_update_cluster_state_uuid(const char* uuid) { - if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); - while (!wsrep_ready) - { - WSREP_INFO("Waiting to reach ready state"); - mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready); - } - WSREP_INFO("ready state reached"); - mysql_mutex_unlock (&LOCK_wsrep_ready); + strncpy(cluster_uuid_str, uuid, sizeof(cluster_uuid_str) - 1); } -static void wsrep_synced_cb(void* app_ctx) +static void wsrep_init_position() { - WSREP_INFO("Synchronized with group, ready for connections"); - my_bool signal_main= wsrep_ready_set(TRUE); - wsrep_config_state->set(WSREP_MEMBER_SYNCED); - - if (signal_main) - { - wsrep_SE_init_grab(); - // Signal mysqld init thread to continue - wsrep_sst_complete (&local_uuid, local_seqno, false); - // and wait for SE initialization - wsrep_SE_init_wait(); - } - if (wsrep_restart_slave_activated) - { - int rcode; - WSREP_INFO("MariaDB slave restart"); - wsrep_restart_slave_activated= FALSE; - - mysql_mutex_lock(&LOCK_active_mi); - if ((rcode = start_slave_threads(0, - 1 /* need mutex */, - 0 /* no wait for start*/, - active_mi, - master_info_file, - relay_log_info_file, - SLAVE_SQL))) - { - WSREP_WARN("Failed to create slave threads: %d", rcode); - } - mysql_mutex_unlock(&LOCK_active_mi); - - } } -static void wsrep_init_position() +/**************************************************************************** + Helpers for wsrep_init() + ****************************************************************************/ +static std::string wsrep_server_name() { - /* read XIDs from storage engines */ - wsrep_uuid_t uuid; - wsrep_seqno_t seqno; - wsrep_get_SE_checkpoint(uuid, seqno); - - if (!memcmp(&uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t))) - { - WSREP_INFO("Read nil XID from storage engines, skipping position init"); - return; - } - - char uuid_str[40] = {0, }; - wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); - WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno); - - if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) && - local_seqno == WSREP_SEQNO_UNDEFINED) - { - // Initial state - local_uuid= uuid; - local_seqno= seqno; - } - else if (memcmp(&local_uuid, &uuid, sizeof(local_uuid)) || - local_seqno != seqno) - { - WSREP_WARN("Initial position was provided by configuration or SST, " - "avoiding override"); - } + std::string ret(wsrep_node_name ? wsrep_node_name : ""); + return ret; } -extern char* my_bind_addr_str; - -int wsrep_init() +static std::string wsrep_server_id() { - int rcode= -1; - DBUG_ASSERT(wsrep_inited == 0); - - if (strcmp(wsrep_start_position, WSREP_START_POSITION_ZERO) && - wsrep_start_position_init(wsrep_start_position)) - { - return 1; - } - - wsrep_sst_auth_init(); - - wsrep_ready_set(FALSE); - assert(wsrep_provider); - - wsrep_init_position(); - - if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK) - { - if (strcasecmp(wsrep_provider, WSREP_NONE)) - { - WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.", - wsrep_provider, strerror(rcode), rcode); - strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack - return wsrep_init(); - } - else /* this is for recursive call above */ - { - WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.", - strerror(rcode), rcode); - unireg_abort(1); - } - } + /* using empty server_id, which enables view change handler to + set final server_id later on + */ + std::string ret(""); + return ret; +} - if (!WSREP_PROVIDER_EXISTS) - { - // enable normal operation in case no provider is specified - wsrep_ready_set(TRUE); - wsrep_inited= 1; - global_system_variables.wsrep_on = 0; - wsrep_init_args args; - args.logger_cb = wsrep_log_cb; - args.options = (wsrep_provider_options) ? - wsrep_provider_options : ""; - rcode = wsrep->init(wsrep, &args); - if (rcode) - { - DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); - WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); - wsrep->free(wsrep); - free(wsrep); - wsrep = NULL; - } - return rcode; - } - else - { - global_system_variables.wsrep_on = 1; - strncpy(provider_name, - wsrep->provider_name, sizeof(provider_name) - 1); - strncpy(provider_version, - wsrep->provider_version, sizeof(provider_version) - 1); - strncpy(provider_vendor, - wsrep->provider_vendor, sizeof(provider_vendor) - 1); - } +static std::string wsrep_server_node_address() +{ + std::string ret; if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) - wsrep_data_home_dir = mysql_real_data_home; + wsrep_data_home_dir= mysql_real_data_home; /* Initialize node address */ - char node_addr[512]= { 0, }; - size_t const node_addr_max= sizeof(node_addr) - 1; if (!wsrep_node_address || !strcmp(wsrep_node_address, "")) { - size_t const ret= wsrep_guess_ip(node_addr, node_addr_max); - if (!(ret > 0 && ret < node_addr_max)) + char node_addr[512]= {0, }; + const size_t node_addr_max= sizeof(node_addr) - 1; + size_t guess_ip_ret= wsrep_guess_ip(node_addr, node_addr_max); + if (!(guess_ip_ret > 0 && guess_ip_ret < node_addr_max)) { WSREP_WARN("Failed to guess base node address. Set it explicitly via " "wsrep_node_address."); - node_addr[0]= '\0'; + } + else + { + ret= node_addr; } } else { - strncpy(node_addr, wsrep_node_address, node_addr_max); + ret= wsrep_node_address; } + return ret; +} - /* Initialize node's incoming address */ +static std::string wsrep_server_incoming_address() +{ + std::string ret; + const std::string node_addr(wsrep_server_node_address()); char inc_addr[512]= { 0, }; size_t const inc_addr_max= sizeof (inc_addr); @@ -685,7 +505,8 @@ int wsrep_init() bool is_ipv6= false; unsigned int my_bind_ip= INADDR_ANY; // default if not set - if (my_bind_addr_str && strlen(my_bind_addr_str)) + if (my_bind_addr_str && strlen(my_bind_addr_str) && + strcmp(my_bind_addr_str, "*") != 0) { my_bind_ip= wsrep_check_ip(my_bind_addr_str, &is_ipv6); } @@ -704,22 +525,28 @@ int wsrep_init() } else /* mysqld binds to 0.0.0.0, try taking IP from wsrep_node_address. */ { - size_t const node_addr_len= strlen(node_addr); - if (node_addr_len > 0) + if (node_addr.size()) { - wsp::Address addr(node_addr); - - if (!addr.is_valid()) + size_t const ip_len= wsrep_host_len(node_addr.c_str(), node_addr.size()); + if (ip_len + 7 /* :55555\0 */ < inc_addr_max) { - WSREP_DEBUG("Could not parse node address : %s", node_addr); - WSREP_WARN("Guessing address for incoming client connections failed. " - "Try setting wsrep_node_incoming_address explicitly."); - goto done; + memcpy (inc_addr, node_addr.c_str(), ip_len); + snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u", + (int)mysqld_port); } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } - const char *fmt= (addr.is_ipv6()) ? "[%s]:%u" : "%s:%u"; - snprintf(inc_addr, inc_addr_max, fmt, addr.get_address(), - (int) mysqld_port); + if (!strlen(inc_addr)) + { + WSREP_WARN("Guessing address for incoming client connections failed. " + "Try setting wsrep_node_incoming_address explicitly."); + WSREP_INFO("Node addr: %s", node_addr.c_str()); } } } @@ -743,52 +570,178 @@ int wsrep_init() snprintf(inc_addr, inc_addr_max, fmt, addr.get_address(), port); } + + done: + ret= wsrep_node_incoming_address; + return ret; +} -done: - struct wsrep_init_args wsrep_args; +static std::string wsrep_server_working_dir() +{ + std::string ret; + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + { + ret= mysql_real_data_home; + } + else + { + ret= wsrep_data_home_dir; + } + return ret; +} - struct wsrep_gtid const state_id = { local_uuid, local_seqno }; +static wsrep::gtid wsrep_server_initial_position() +{ + wsrep::gtid ret; + WSREP_INFO("Server initial position: %s", wsrep_start_position); + std::istringstream is(wsrep_start_position); + is >> ret; + return ret; +} - wsrep_args.data_dir = wsrep_data_home_dir; - wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : ""; - wsrep_args.node_address = node_addr; - wsrep_args.node_incoming = inc_addr; - wsrep_args.options = (wsrep_provider_options) ? - wsrep_provider_options : ""; - wsrep_args.proto_ver = wsrep_max_protocol_version; +/* + Intitialize provider specific status variables + */ +static void wsrep_init_provider_status_variables() +{ + const wsrep::provider& provider= + Wsrep_server_state::instance().provider(); + strncpy(provider_name, + provider.name().c_str(), sizeof(provider_name) - 1); + strncpy(provider_version, + provider.version().c_str(), sizeof(provider_version) - 1); + strncpy(provider_vendor, + provider.vendor().c_str(), sizeof(provider_vendor) - 1); +} + +int wsrep_init_server() +{ + wsrep::log::logger_fn(wsrep_log_cb); + try + { + std::string server_name; + std::string server_id; + std::string node_address; + std::string incoming_address; + std::string working_dir; + wsrep::gtid initial_position; + + server_name= wsrep_server_name(); + server_id= wsrep_server_id(); + node_address= wsrep_server_node_address(); + incoming_address= wsrep_server_incoming_address(); + working_dir= wsrep_server_working_dir(); + initial_position= wsrep_server_initial_position(); + + Wsrep_server_state::init_once(server_name, + incoming_address, + node_address, + working_dir, + initial_position, + wsrep_max_protocol_version); + } + catch (const wsrep::runtime_error& e) + { + WSREP_ERROR("Failed to init wsrep server %s", e.what()); + return 1; + } + catch (const std::exception& e) + { + WSREP_ERROR("Failed to init wsrep server %s", e.what()); + } + return 0; +} - wsrep_args.state_id = &state_id; +void wsrep_init_globals() +{ + wsrep_init_sidno(Wsrep_server_state::instance().connected_gtid().id()); + wsrep_init_schema(); + if (WSREP_ON) + { + Wsrep_server_state::instance().initialized(); + } +} - wsrep_args.logger_cb = wsrep_log_cb; - wsrep_args.view_handler_cb = wsrep_view_handler_cb; - wsrep_args.apply_cb = wsrep_apply_cb; - wsrep_args.commit_cb = wsrep_commit_cb; - wsrep_args.unordered_cb = wsrep_unordered_cb; - wsrep_args.sst_donate_cb = wsrep_sst_donate_cb; - wsrep_args.synced_cb = wsrep_synced_cb; +void wsrep_deinit_server() +{ + wsrep_deinit_schema(); + Wsrep_server_state::destroy(); +} - rcode = wsrep->init(wsrep, &wsrep_args); +int wsrep_init() +{ + assert(wsrep_provider); - if (rcode) + wsrep_init_position(); + wsrep_sst_auth_init(); + + if (strlen(wsrep_provider)== 0 || + !strcmp(wsrep_provider, WSREP_NONE)) { - DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); - WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); - wsrep->free(wsrep); - free(wsrep); - wsrep = NULL; - } else { - wsrep_inited= 1; + // enable normal operation in case no provider is specified + global_system_variables.wsrep_on= 0; + int err= Wsrep_server_state::instance().load_provider(wsrep_provider, wsrep_provider_options ? wsrep_provider_options : ""); + if (err) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", err)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", err); + } + else + { + wsrep_init_provider_status_variables(); + } + return err; } - return rcode; -} + global_system_variables.wsrep_on= 1; + + if (wsrep_gtid_mode && opt_bin_log && !opt_log_slave_updates) + { + WSREP_ERROR("Option --log-slave-updates is required if " + "binlog is enabled, GTID mode is on and wsrep provider " + "is specified"); + return 1; + } + + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + wsrep_data_home_dir= mysql_real_data_home; + + if (Wsrep_server_state::instance().load_provider(wsrep_provider, + wsrep_provider_options)) + { + WSREP_ERROR("Failed to load provider"); + return 1; + } + + if (!wsrep_provider_is_SR_capable() && + global_system_variables.wsrep_trx_fragment_size > 0) + { + WSREP_ERROR("The WSREP provider (%s) does not support streaming " + "replication but wsrep_trx_fragment_size is set to a " + "value other than 0 (%llu). Cannot continue. Either set " + "wsrep_trx_fragment_size to 0 or use wsrep_provider that " + "supports streaming replication.", + wsrep_provider, global_system_variables.wsrep_trx_fragment_size); + Wsrep_server_state::instance().unload_provider(); + return 1; + } + wsrep_inited= 1; + + wsrep_init_provider_status_variables(); + wsrep_capabilities_export(Wsrep_server_state::instance().provider().capabilities(), + &wsrep_provider_capabilities); + + WSREP_DEBUG("SR storage init for: %s", + (wsrep_SR_store_type == WSREP_SR_STORE_TABLE) ? "table" : "void"); + return 0; +} /* Initialize wsrep thread LOCKs and CONDs */ void wsrep_thr_init() { DBUG_ENTER("wsrep_thr_init"); - wsrep_config_state = new wsp::Config_state; + wsrep_config_state= new wsp::Config_state; #ifdef HAVE_PSI_INTERFACE mysql_mutex_register("sql", wsrep_mutexes, array_elements(wsrep_mutexes)); mysql_cond_register("sql", wsrep_conds, array_elements(wsrep_conds)); @@ -801,25 +754,24 @@ void wsrep_thr_init() mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); mysql_mutex_init(key_LOCK_wsrep_sst_init, &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); - mysql_mutex_init(key_LOCK_wsrep_rollback, &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST); - mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL); mysql_mutex_init(key_LOCK_wsrep_replaying, &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); mysql_mutex_init(key_LOCK_wsrep_slave_threads, &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_wsrep_desync, &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_wsrep_config_state, &LOCK_wsrep_config_state, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_SR_pool, + &LOCK_wsrep_SR_pool, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_SR_store, + &LOCK_wsrep_SR_store, MY_MUTEX_INIT_FAST); DBUG_VOID_RETURN; } -void wsrep_init_startup (bool first) +void wsrep_init_startup (bool sst_first) { if (wsrep_init()) unireg_abort(1); - wsrep_thr_lock_init( - (wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF, - (wsrep_abort_thd_fun)wsrep_abort_thd, - wsrep_debug, wsrep_convert_LOCK_to_trx, - (wsrep_on_fun)wsrep_on); + wsrep_thr_lock_init(wsrep_thd_is_BF, wsrep_thd_bf_abort, + wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on); /* Pre-initialize global_system_variables.table_plugin with a dummy engine @@ -838,28 +790,54 @@ void wsrep_init_startup (bool first) /* Skip replication start if no cluster address */ if (!wsrep_cluster_address || wsrep_cluster_address[0] == 0) return; - if (first) wsrep_sst_grab(); // do it so we can wait for SST below - + /* + Read value of wsrep_new_cluster before wsrep_start_replication(), + the value is reset to FALSE inside wsrep_start_replication. + */ if (!wsrep_start_replication()) unireg_abort(1); wsrep_create_rollbacker(); wsrep_create_appliers(1); - if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + /* + If the SST happens before server initialization, wait until the server + state reaches initializing. This indicates that + either SST was not necessary or SST has been delivered. + + With mysqldump SST (!sst_first) wait until the server reaches + joiner state and procedd to accepting connections. + */ + if (sst_first) + { + server_state.wait_until_state(Wsrep_server_state::s_initializing); + } + else + { + server_state.wait_until_state(Wsrep_server_state::s_joiner); + } } void wsrep_deinit(bool free_options) { DBUG_ASSERT(wsrep_inited == 1); - wsrep_unload(wsrep); - wsrep= 0; + WSREP_DEBUG("wsrep_deinit"); + + Wsrep_server_state::instance().unload_provider(); provider_name[0]= '\0'; provider_version[0]= '\0'; provider_vendor[0]= '\0'; wsrep_inited= 0; + if (wsrep_provider_capabilities != NULL) + { + char* p= wsrep_provider_capabilities; + wsrep_provider_capabilities= NULL; + free(p); + } + if (free_options) { wsrep_sst_auth_free(); @@ -871,28 +849,37 @@ void wsrep_thr_deinit() { if (!wsrep_config_state) return; // Never initialized + WSREP_DEBUG("wsrep_thr_deinit"); mysql_mutex_destroy(&LOCK_wsrep_ready); mysql_cond_destroy(&COND_wsrep_ready); mysql_mutex_destroy(&LOCK_wsrep_sst); mysql_cond_destroy(&COND_wsrep_sst); mysql_mutex_destroy(&LOCK_wsrep_sst_init); mysql_cond_destroy(&COND_wsrep_sst_init); - mysql_mutex_destroy(&LOCK_wsrep_rollback); - mysql_cond_destroy(&COND_wsrep_rollback); mysql_mutex_destroy(&LOCK_wsrep_replaying); mysql_cond_destroy(&COND_wsrep_replaying); mysql_mutex_destroy(&LOCK_wsrep_slave_threads); mysql_mutex_destroy(&LOCK_wsrep_desync); mysql_mutex_destroy(&LOCK_wsrep_config_state); + mysql_mutex_destroy(&LOCK_wsrep_SR_pool); + mysql_mutex_destroy(&LOCK_wsrep_SR_store); + delete wsrep_config_state; wsrep_config_state= 0; // Safety + + if (wsrep_cluster_capabilities != NULL) + { + char* p= wsrep_cluster_capabilities; + wsrep_cluster_capabilities= NULL; + free(p); + } } void wsrep_recover() { char uuid_str[40]; - if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) && + if (wsrep_uuid_compare(&local_uuid, &WSREP_UUID_UNDEFINED) == 0 && local_seqno == -2) { wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str)); @@ -900,43 +887,60 @@ void wsrep_recover() uuid_str, (long long)local_seqno); return; } - wsrep_uuid_t uuid; - wsrep_seqno_t seqno; - wsrep_get_SE_checkpoint(uuid, seqno); - wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); - WSREP_INFO("Recovered position: %s:%lld", uuid_str, (long long)seqno); + wsrep::gtid gtid= wsrep_get_SE_checkpoint(); + std::ostringstream oss; + oss << gtid; + WSREP_INFO("Recovered position: %s", oss.str().c_str()); } void wsrep_stop_replication(THD *thd) { WSREP_INFO("Stop replication"); - if (!wsrep) + if (Wsrep_server_state::instance().state() != + Wsrep_server_state::s_disconnected) { - WSREP_INFO("Provider was not loaded, in stop replication"); - return; + WSREP_DEBUG("Disconnect provider"); + Wsrep_server_state::instance().disconnect(); + Wsrep_server_state::instance().wait_until_state(Wsrep_server_state::s_disconnected); } - /* disconnect from group first to get wsrep_ready == FALSE */ - WSREP_DEBUG("Provider disconnect"); - wsrep->disconnect(wsrep); + /* my connection, should not terminate with wsrep_close_client_connection(), + make transaction to rollback + */ + if (thd && !thd->wsrep_applier) trans_rollback(thd); + wsrep_close_client_connections(TRUE, thd); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(thd); + + node_uuid= WSREP_UUID_UNDEFINED; +} - wsrep_connected= FALSE; +void wsrep_shutdown_replication() +{ + WSREP_INFO("Shutdown replication"); + if (Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected) + { + WSREP_DEBUG("Disconnect provider"); + Wsrep_server_state::instance().disconnect(); + Wsrep_server_state::instance().wait_until_state(Wsrep_server_state::s_disconnected); + } wsrep_close_client_connections(TRUE); /* wait until appliers have stopped */ - wsrep_wait_appliers_close(thd); + wsrep_wait_appliers_close(NULL); + node_uuid= WSREP_UUID_UNDEFINED; - return; + /* Undocking the thread specific data. */ + my_pthread_setspecific_ptr(THR_THD, NULL); } bool wsrep_start_replication() { - wsrep_status_t rcode; - - /* wsrep provider must be loaded. */ - DBUG_ASSERT(wsrep); + int rcode; + WSREP_DEBUG("wsrep_start_replication"); /* if provider is trivial, don't even try to connect, @@ -945,34 +949,27 @@ bool wsrep_start_replication() if (!WSREP_PROVIDER_EXISTS) { // enable normal operation in case no provider is specified - wsrep_ready_set(TRUE); return true; } if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) { // if provider is non-trivial, but no address is specified, wait for address - wsrep_ready_set(FALSE); return true; } - bool const bootstrap= wsrep_new_cluster; + bool const bootstrap(TRUE == wsrep_new_cluster); + wsrep_new_cluster= FALSE; WSREP_INFO("Start replication"); - if (wsrep_new_cluster) + if ((rcode= Wsrep_server_state::instance().connect( + wsrep_cluster_name, + wsrep_cluster_address, + wsrep_sst_donor, + bootstrap))) { - WSREP_INFO("'wsrep-new-cluster' option used, bootstrapping the cluster"); - wsrep_new_cluster= false; - } - - if ((rcode = wsrep->connect(wsrep, - wsrep_cluster_name, - wsrep_cluster_address, - wsrep_sst_donor, - bootstrap))) - { - DBUG_PRINT("wsrep",("wsrep->connect(%s) failed: %d", + DBUG_PRINT("wsrep",("wsrep_ptr->connect(%s) failed: %d", wsrep_cluster_address, rcode)); WSREP_ERROR("wsrep::connect(%s) failed: %d", wsrep_cluster_address, rcode); @@ -980,15 +977,12 @@ bool wsrep_start_replication() } else { - wsrep_connected= TRUE; - - char* opts= wsrep->options_get(wsrep); - if (opts) + try { - wsrep_provider_options_init(opts); - free(opts); + std::string opts= Wsrep_server_state::instance().provider().options(); + wsrep_provider_options_init(opts.c_str()); } - else + catch (const wsrep::runtime_error&) { WSREP_WARN("Failed to get wsrep options"); } @@ -999,40 +993,50 @@ bool wsrep_start_replication() bool wsrep_must_sync_wait (THD* thd, uint mask) { - return (thd->variables.wsrep_sync_wait & mask) && + bool ret; + mysql_mutex_lock(&thd->LOCK_thd_data); + ret= (thd->variables.wsrep_sync_wait & mask) && + thd->wsrep_client_thread && thd->variables.wsrep_on && !(thd->variables.wsrep_dirty_reads && !is_update_query(thd->lex->sql_command)) && !thd->in_active_multi_stmt_transaction() && - thd->wsrep_conflict_state != REPLAYING && - thd->wsrep_sync_wait_gtid.seqno == WSREP_SEQNO_UNDEFINED; + thd->wsrep_trx().state() != + wsrep::transaction::s_replaying && + thd->wsrep_cs().sync_wait_gtid().is_undefined(); + mysql_mutex_unlock(&thd->LOCK_thd_data); + return ret; } bool wsrep_sync_wait (THD* thd, uint mask) { if (wsrep_must_sync_wait(thd, mask)) { - WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait = %u, mask = %u", - thd->variables.wsrep_sync_wait, mask); - // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 - // TODO: modify to check if thd has locked any rows. - wsrep_status_t ret= wsrep->causal_read (wsrep, &thd->wsrep_sync_wait_gtid); - - if (unlikely(WSREP_OK != ret)) + WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait= %u, " + "mask= %u, thd->variables.wsrep_on= %d", + thd->variables.wsrep_sync_wait, mask, + thd->variables.wsrep_on); + /* + This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 + TODO: modify to check if thd has locked any rows. + */ + if (thd->wsrep_cs().sync_wait(-1)) { const char* msg; int err; - // Possibly relevant error codes: - // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, - // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, - // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + /* + Possibly relevant error codes: + ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, + ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, + ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + */ - switch (ret) + switch (thd->wsrep_cs().current_error()) { - case WSREP_NOT_IMPLEMENTED: + case wsrep::e_not_supported_error: msg= "synchronous reads by wsrep backend. " - "Please unset wsrep_causal_reads variable."; + "Please unset wsrep_causal_reads variable."; err= ER_NOT_SUPPORTED_YET; break; default: @@ -1050,6 +1054,27 @@ bool wsrep_sync_wait (THD* thd, uint mask) return false; } +enum wsrep::provider::status +wsrep_sync_wait_upto (THD* thd, + wsrep_gtid_t* upto, + int timeout) +{ + DBUG_ASSERT(upto); + enum wsrep::provider::status ret; + if (upto) + { + wsrep::gtid upto_gtid(wsrep::id(upto->uuid.data, sizeof(upto->uuid.data)), + wsrep::seqno(upto->seqno)); + ret= Wsrep_server_state::instance().wait_for_gtid(upto_gtid, timeout); + } + else + { + ret= Wsrep_server_state::instance().causal_read(timeout).second; + } + WSREP_DEBUG("wsrep_sync_wait_upto: %d", ret); + return ret; +} + void wsrep_keys_free(wsrep_key_arr_t* key_arr) { for (size_t i= 0; i < key_arr->keys_len; ++i) @@ -1061,7 +1086,6 @@ void wsrep_keys_free(wsrep_key_arr_t* key_arr) key_arr->keys_len= 0; } - /*! * @param db Database string * @param table Table string @@ -1073,9 +1097,9 @@ void wsrep_keys_free(wsrep_key_arr_t* key_arr) */ static bool wsrep_prepare_key_for_isolation(const char* db, - const char* table, - wsrep_buf_t* key, - size_t* key_len) + const char* table, + wsrep_buf_t* key, + size_t* key_len) { if (*key_len < 2) return false; @@ -1087,11 +1111,11 @@ static bool wsrep_prepare_key_for_isolation(const char* db, case 1: case 2: case 3: + case 4: { *key_len= 0; if (db) { - // sql_print_information("%s.%s", db, table); key[*key_len].ptr= db; key[*key_len].len= strlen(db); ++(*key_len); @@ -1105,26 +1129,23 @@ static bool wsrep_prepare_key_for_isolation(const char* db, break; } default: + assert(0); + WSREP_ERROR("Unsupported protocol version: %ld", wsrep_protocol_version); + unireg_abort(1); return false; } - return true; -} + return true; +} static bool wsrep_prepare_key_for_isolation(const char* db, const char* table, wsrep_key_arr_t* ka) { wsrep_key_t* tmp; - - if (!ka->keys) - tmp= (wsrep_key_t*)my_malloc((ka->keys_len + 1) * sizeof(wsrep_key_t), - MYF(0)); - else - tmp= (wsrep_key_t*)my_realloc(ka->keys, - (ka->keys_len + 1) * sizeof(wsrep_key_t), - MYF(0)); - + tmp= (wsrep_key_t*)my_realloc(ka->keys, + (ka->keys_len + 1) * sizeof(wsrep_key_t), + MYF(MY_ALLOW_ZERO_PTR)); if (!tmp) { WSREP_ERROR("Can't allocate memory for key_array"); @@ -1150,7 +1171,6 @@ static bool wsrep_prepare_key_for_isolation(const char* db, return true; } - static bool wsrep_prepare_keys_for_alter_add_fk(const char* child_table_db, Alter_info* alter_info, wsrep_key_arr_t* ka) @@ -1177,7 +1197,6 @@ static bool wsrep_prepare_keys_for_alter_add_fk(const char* child_table_db, return true; } - static bool wsrep_prepare_keys_for_isolation(THD* thd, const char* db, const char* table, @@ -1205,16 +1224,19 @@ static bool wsrep_prepare_keys_for_isolation(THD* thd, if (!wsrep_prepare_keys_for_alter_add_fk(table_list->db.str, alter_info, ka)) goto err; } - return false; err: - wsrep_keys_free(ka); - return true; + wsrep_keys_free(ka); + return true; } +/* + * Prepare key list from db/table and table_list + * + * Return zero in case of success, 1 in case of failure. + */ -/* Prepare key list from db/table and table_list */ bool wsrep_prepare_keys_for_isolation(THD* thd, const char* db, const char* table, @@ -1224,7 +1246,6 @@ bool wsrep_prepare_keys_for_isolation(THD* thd, return wsrep_prepare_keys_for_isolation(thd, db, table, table_list, NULL, ka); } - bool wsrep_prepare_key(const uchar* cache_key, size_t cache_key_len, const uchar* row_id, size_t row_id_len, wsrep_buf_t* key, size_t* key_len) @@ -1236,37 +1257,110 @@ bool wsrep_prepare_key(const uchar* cache_key, size_t cache_key_len, { case 0: { - key[0].ptr = cache_key; - key[0].len = cache_key_len; + key[0].ptr= cache_key; + key[0].len= cache_key_len; - *key_len = 1; + *key_len= 1; break; } case 1: case 2: case 3: + case 4: { - key[0].ptr = cache_key; - key[0].len = strlen( (char*)cache_key ); + key[0].ptr= cache_key; + key[0].len= strlen( (char*)cache_key ); - key[1].ptr = cache_key + strlen( (char*)cache_key ) + 1; - key[1].len = strlen( (char*)(key[1].ptr) ); + key[1].ptr= cache_key + strlen( (char*)cache_key ) + 1; + key[1].len= strlen( (char*)(key[1].ptr) ); - *key_len = 2; + *key_len= 2; break; } default: return false; } - key[*key_len].ptr = row_id; - key[*key_len].len = row_id_len; + key[*key_len].ptr= row_id; + key[*key_len].len= row_id_len; ++(*key_len); return true; } +bool wsrep_prepare_key_for_innodb(THD* thd, + const uchar* cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_buf_t* key, + size_t* key_len) +{ + + return wsrep_prepare_key(cache_key, cache_key_len, row_id, row_id_len, key, key_len); +} + +wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, + enum wsrep::key::type type) +{ + wsrep::key ret(type); + DBUG_ASSERT(db); + ret.append_key_part(db, strlen(db)); + if (table) ret.append_key_part(table, strlen(table)); + return ret; +} +wsrep::key_array +wsrep_prepare_keys_for_alter_add_fk(const char* child_table_db, + Alter_info* alter_info) + +{ + wsrep::key_array ret; + Key *key; + List_iterator<Key> key_iterator(alter_info->key_list); + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + Foreign_key *fk_key= (Foreign_key *)key; + const char *db_name= fk_key->ref_db.str; + const char *table_name= fk_key->ref_table.str; + if (!db_name) + { + db_name= child_table_db; + } + ret.push_back(wsrep_prepare_key_for_toi(db_name, table_name, + wsrep::key::exclusive)); + } + } + return ret; +} + +wsrep::key_array wsrep_prepare_keys_for_toi(const char* db, + const char* table, + const TABLE_LIST* table_list, + Alter_info* alter_info) +{ + wsrep::key_array ret; + if (db || table) + { + ret.push_back(wsrep_prepare_key_for_toi(db, table, wsrep::key::exclusive)); + } + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + ret.push_back(wsrep_prepare_key_for_toi(table->db.str, table->table_name.str, + wsrep::key::exclusive)); + } + if (alter_info && (alter_info->flags & ALTER_ADD_FOREIGN_KEY)) + { + wsrep::key_array fk(wsrep_prepare_keys_for_alter_add_fk(table_list->db.str, alter_info)); + if (!fk.empty()) + { + ret.insert(ret.end(), fk.begin(), fk.end()); + } + } + return ret; +} /* * Construct Query_log_Event from thd query and serialize it * into buffer. @@ -1277,7 +1371,7 @@ int wsrep_to_buf_helper( THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len) { IO_CACHE tmp_io_cache; - Log_event_writer writer(&tmp_io_cache,0); + Log_event_writer writer(&tmp_io_cache, 0); if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, 65536, MYF(MY_WME))) return 1; @@ -1365,7 +1459,7 @@ create_view_query(THD *thd, uchar** buf, size_t* buf_len) LEX *lex= thd->lex; SELECT_LEX *select_lex= lex->first_select_lex(); TABLE_LIST *first_table= select_lex->table_list.first; - TABLE_LIST *views = first_table; + TABLE_LIST *views= first_table; LEX_USER *definer; String buff; const LEX_CSTRING command[3]= @@ -1390,16 +1484,16 @@ create_view_query(THD *thd, uchar** buf, size_t* buf_len) if (definer) { - views->definer.user = definer->user; - views->definer.host = definer->host; + views->definer.user= definer->user; + views->definer.host= definer->host; } else { WSREP_ERROR("Failed to get DEFINER for VIEW."); return 1; } - views->algorithm = lex->create_view->algorithm; - views->view_suid = lex->create_view->suid; - views->with_check = lex->create_view->check; + views->algorithm = lex->create_view->algorithm; + views->view_suid = lex->create_view->suid; + views->with_check = lex->create_view->check; view_store_options(thd, views, &buff); buff.append(STRING_WITH_LEN("VIEW ")); @@ -1425,12 +1519,8 @@ create_view_query(THD *thd, uchar** buf, size_t* buf_len) buff.append(')'); } buff.append(STRING_WITH_LEN(" AS ")); - //buff.append(views->source.str, views->source.length); buff.append(thd->lex->create_view->select.str, thd->lex->create_view->select.length); - //int errcode= query_error_code(thd, TRUE); - //if (thd->binlog_query(THD::STMT_QUERY_TYPE, - // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); } @@ -1496,8 +1586,7 @@ static int wsrep_drop_table_query(THD* thd, uchar** buf, size_t* buf_len) /* Forward declarations. */ -static int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); -static int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); /* Decide if statement should run in TOI. @@ -1577,6 +1666,7 @@ static bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, } } +#if UNUSED /* 323f269d4099 (Jan Lindström 2018-07-19) */ static const char* wsrep_get_query_or_msg(const THD* thd) { switch(thd->lex->sql_command) @@ -1589,58 +1679,70 @@ static const char* wsrep_get_query_or_msg(const THD* thd) return "REVOKE"; case SQLCOM_SET_OPTION: if (thd->lex->definer) - return "SET PASSWORD"; + return "SET PASSWORD"; /* fallthrough */ default: return thd->query(); } } +#endif //UNUSED -/* - returns: - 0: statement was replicated as TOI - 1: TOI replication was skipped - -1: TOI replication failed - */ -static int wsrep_TOI_begin(THD *thd, const char *db_, const char *table_, - const TABLE_LIST* table_list, - Alter_info* alter_info) +static int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) { - wsrep_status_t ret(WSREP_WARNING); - uchar* buf(0); - size_t buf_len(0); - int buf_err; - int rc= 0; + String log_query; + sp_head *sp= thd->lex->sphead; + sql_mode_t saved_mode= thd->variables.sql_mode; + String retstr(64); + LEX_CSTRING returns= empty_clex_str; + retstr.set_charset(system_charset_info); - if (wsrep_can_run_in_toi(thd, db_, table_, table_list) == false) + log_query.set_charset(system_charset_info); + + if (sp->m_handler->type() == TYPE_ENUM_FUNCTION) { - WSREP_DEBUG("No TOI for %s", WSREP_QUERY(thd)); + sp_returns_type(thd, retstr, sp); + returns= retstr.lex_cstring(); + } + if (sp->m_handler-> + show_create_sp(thd, &log_query, + sp->m_explicit_name ? sp->m_db : null_clex_str, + sp->m_name, sp->m_params, returns, + sp->m_body, sp->chistics(), + thd->lex->definer[0], + thd->lex->create_info, + saved_mode)) + { + WSREP_WARN("SP create string failed: schema: %s, query: %s", + thd->get_db(), thd->query()); return 1; } - WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), - thd->wsrep_exec_mode, wsrep_get_query_or_msg(thd)); + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len) +{ + int err; switch (thd->lex->sql_command) { case SQLCOM_CREATE_VIEW: - buf_err= create_view_query(thd, &buf, &buf_len); + err= create_view_query(thd, buf, buf_len); break; case SQLCOM_CREATE_PROCEDURE: case SQLCOM_CREATE_SPFUNCTION: - buf_err= wsrep_create_sp(thd, &buf, &buf_len); + err= wsrep_create_sp(thd, buf, buf_len); break; case SQLCOM_CREATE_TRIGGER: - buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len); + err= wsrep_create_trigger_query(thd, buf, buf_len); break; case SQLCOM_CREATE_EVENT: - buf_err= wsrep_create_event_query(thd, &buf, &buf_len); + err= wsrep_create_event_query(thd, buf, buf_len); break; case SQLCOM_ALTER_EVENT: - buf_err= wsrep_alter_event_query(thd, &buf, &buf_len); + err= wsrep_alter_event_query(thd, buf, buf_len); break; case SQLCOM_DROP_TABLE: - buf_err= wsrep_drop_table_query(thd, &buf, &buf_len); + err= wsrep_drop_table_query(thd, buf, buf_len); break; case SQLCOM_CREATE_ROLE: if (sp_process_definer(thd)) @@ -1649,169 +1751,212 @@ static int wsrep_TOI_begin(THD *thd, const char *db_, const char *table_, } /* fallthrough */ default: - buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), - &buf, &buf_len); + err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), buf, + buf_len); break; } - wsrep_key_arr_t key_arr= {0, 0}; - struct wsrep_buf buff = { buf, buf_len }; - if (!buf_err && - !wsrep_prepare_keys_for_isolation(thd, db_, table_, - table_list, alter_info, &key_arr) && - key_arr.keys_len > 0 && - WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id, - key_arr.keys, key_arr.keys_len, - &buff, 1, - &thd->wsrep_trx_meta))) - { - thd->wsrep_exec_mode= TOTAL_ORDER; - wsrep_to_isolation++; - wsrep_keys_free(&key_arr); - WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)wsrep_thd_trx_seqno(thd), - thd->wsrep_exec_mode); - } - else if (key_arr.keys_len > 0) { - /* jump to error handler in mysql_execute_command() */ - WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. Check wsrep " - "connection state and retry the query.", - ret, - thd->get_db(), - (thd->query()) ? thd->query() : "void"); - my_message(ER_LOCK_DEADLOCK, "WSREP replication failed. Check " - "your wsrep connection state and retry the query.", MYF(0)); - wsrep_keys_free(&key_arr); - rc= -1; - } - else { - /* non replicated DDL, affecting temporary tables only */ - WSREP_DEBUG("TO isolation skipped for: %d, sql: %s." - "Only temporary tables affected.", - ret, (thd->query()) ? thd->query() : "void"); - rc= 1; + return err; +} + +static void wsrep_TOI_begin_failed(THD* thd, const wsrep_buf_t* /* const err */) +{ + if (wsrep_thd_trx_seqno(thd) > 0) + { + /* GTID was granted and TO acquired - need to log event and release TO */ + if (wsrep_emulate_bin_log) wsrep_thd_binlog_trx_reset(thd); + if (wsrep_write_dummy_event(thd, "TOI begin failed")) { goto fail; } + wsrep::client_state& cs(thd->wsrep_cs()); + int const ret= cs.leave_toi(); + if (ret) + { + WSREP_ERROR("Leaving critical section for failed TOI failed: thd: %lld, " + "schema: %s, SQL: %s, rcode: %d wsrep_error: %s", + (long long)thd->real_id, thd->db.str, + thd->query(), ret, wsrep::to_c_string(cs.current_error())); + goto fail; + } } - if (buf) my_free(buf); - return rc; + return; +fail: + WSREP_ERROR("Failed to release TOI resources. Need to abort."); + unireg_abort(1); } -static void wsrep_TOI_end(THD *thd) { - wsrep_status_t ret; - wsrep_to_isolation--; - WSREP_DEBUG("TO END: %lld, %d: %s", (long long)wsrep_thd_trx_seqno(thd), - thd->wsrep_exec_mode, wsrep_get_query_or_msg(thd)); +/* + returns: + 0: statement was replicated as TOI + 1: TOI replication was skipped + -1: TOI replication failed + */ +static int wsrep_TOI_begin(THD *thd, const char *db, const char *table, + const TABLE_LIST* table_list, + Alter_info* alter_info) +{ + DBUG_ASSERT(thd->variables.wsrep_OSU_method == WSREP_OSU_TOI); - wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid, - thd->wsrep_trx_meta.gtid.seqno); - WSREP_DEBUG("TO END: %lld, update seqno", - (long long)wsrep_thd_trx_seqno(thd)); - - if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) { - WSREP_DEBUG("TO END: %lld", (long long)wsrep_thd_trx_seqno(thd)); + WSREP_DEBUG("TOI Begin"); + if (wsrep_can_run_in_toi(thd, db, table, table_list) == false) + { + WSREP_DEBUG("No TOI for %s", WSREP_QUERY(thd)); + return 1; } - else { - WSREP_WARN("TO isolation end failed for: %d, schema: %s, sql: %s", - ret, - thd->get_db(), - (thd->query()) ? thd->query() : "void"); + + uchar* buf= 0; + size_t buf_len(0); + int buf_err; + int rc; + + buf_err= wsrep_TOI_event_buf(thd, &buf, &buf_len); + if (buf_err) { + WSREP_ERROR("Failed to create TOI event buf: %d", buf_err); + my_message(ER_UNKNOWN_ERROR, + "WSREP replication failed to prepare TOI event buffer. " + "Check your query.", + MYF(0)); + return -1; } -} + struct wsrep_buf buff= { buf, buf_len }; -static int wsrep_RSU_begin(THD *thd, const char *db_, const char *table_) -{ - wsrep_status_t ret(WSREP_WARNING); - WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), - thd->wsrep_exec_mode, thd->query() ); + wsrep::key_array key_array= + wsrep_prepare_keys_for_toi(db, table, table_list, alter_info); - ret = wsrep->desync(wsrep); - if (ret != WSREP_OK) + if (thd->has_read_only_protection()) { - WSREP_WARN("RSU desync failed %d for schema: %s, query: %s", - ret, thd->get_db(), thd->query()); - my_error(ER_LOCK_DEADLOCK, MYF(0)); - return(ret); + /* non replicated DDL, affecting temporary tables only */ + WSREP_DEBUG("TO isolation skipped, sql: %s." + "Only temporary tables affected.", + WSREP_QUERY(thd)); + if (buf) my_free(buf); + return -1; } - mysql_mutex_lock(&LOCK_wsrep_replaying); - wsrep_replaying++; - mysql_mutex_unlock(&LOCK_wsrep_replaying); + thd_proc_info(thd, "acquiring total order isolation"); - if (wsrep_wait_committing_connections_close(5000)) + wsrep::client_state& cs(thd->wsrep_cs()); + int ret= cs.enter_toi(key_array, + wsrep::const_buffer(buff.ptr, buff.len), + wsrep::provider::flag::start_transaction | + wsrep::provider::flag::commit); + + if (ret) { - /* no can do, bail out from DDL */ - WSREP_WARN("RSU failed due to pending transactions, schema: %s, query %s", - thd->get_db(), thd->query()); - mysql_mutex_lock(&LOCK_wsrep_replaying); - wsrep_replaying--; - mysql_mutex_unlock(&LOCK_wsrep_replaying); + DBUG_ASSERT(cs.current_error()); + WSREP_DEBUG("to_execute_start() failed for %llu: %s, seqno: %lld", + thd->thread_id, WSREP_QUERY(thd), + (long long)wsrep_thd_trx_seqno(thd)); - ret = wsrep->resync(wsrep); - if (ret != WSREP_OK) + /* jump to error handler in mysql_execute_command() */ + switch (cs.current_error()) { - WSREP_WARN("resync failed %d for schema: %s, query: %s", - ret, thd->get_db(), thd->query()); + case wsrep::e_size_exceeded_error: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Maximum size exceeded.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + WSREP_QUERY(thd)); + my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED); + break; + default: + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. " + "Check wsrep connection state and retry the query.", + ret, + (thd->db.str ? thd->db.str : "(null)"), + WSREP_QUERY(thd)); + if (!thd->is_error()) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check " + "your wsrep connection state and retry the query."); + } } - - my_error(ER_LOCK_DEADLOCK, MYF(0)); - return(1); + rc= -1; } - - wsrep_seqno_t seqno = wsrep->pause(wsrep); - if (seqno == WSREP_SEQNO_UNDEFINED) - { - WSREP_WARN("pause failed %lld for schema: %s, query: %s", (long long)seqno, - thd->get_db(), thd->query()); - return(1); + else { + ++wsrep_to_isolation; + rc= 0; } - WSREP_DEBUG("paused at %lld", (long long)seqno); - thd->variables.wsrep_on = 0; - return 0; -} -static void wsrep_RSU_end(THD *thd) -{ - wsrep_status_t ret(WSREP_WARNING); - WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), - thd->wsrep_exec_mode, thd->query() ); + if (buf) my_free(buf); + if (rc) wsrep_TOI_begin_failed(thd, NULL); - mysql_mutex_lock(&LOCK_wsrep_replaying); - wsrep_replaying--; - mysql_mutex_unlock(&LOCK_wsrep_replaying); + return rc; +} - ret = wsrep->resume(wsrep); - if (ret != WSREP_OK) +static void wsrep_TOI_end(THD *thd) { + int ret; + wsrep_to_isolation--; + wsrep::client_state& client_state(thd->wsrep_cs()); + DBUG_ASSERT(wsrep_thd_is_local_toi(thd)); + WSREP_DEBUG("TO END: %lld: %s", client_state.toi_meta().seqno().get(), + WSREP_QUERY(thd)); + + if (wsrep_thd_is_local_toi(thd)) { - WSREP_WARN("resume failed %d for schema: %s, query: %s", ret, - thd->get_db(), thd->query()); + wsrep_set_SE_checkpoint(client_state.toi_meta().gtid()); + if (thd->is_error() && !wsrep_must_ignore_error(thd)) + { + wsrep_apply_error err; + err.store(thd); + client_state.leave_toi(); + } + else + { + ret= client_state.leave_toi(); + } + + if (ret == 0) + { + WSREP_DEBUG("TO END: %lld", client_state.toi_meta().seqno().get()); + } + else + { + WSREP_WARN("TO isolation end failed for: %d, schema: %s, sql: %s", + ret, (thd->db.str ? thd->db.str : "(null)"), WSREP_QUERY(thd)); + } } +} - ret = wsrep->resync(wsrep); - if (ret != WSREP_OK) +static int wsrep_RSU_begin(THD *thd, const char *db_, const char *table_) +{ + WSREP_DEBUG("RSU BEGIN: %lld, : %s", wsrep_thd_trx_seqno(thd), + WSREP_QUERY(thd)); + if (thd->wsrep_cs().begin_rsu(5000)) { - WSREP_WARN("resync failed %d for schema: %s, query: %s", ret, - thd->get_db(), thd->query()); - return; + WSREP_WARN("RSU begin failed"); + } + else + { + thd->variables.wsrep_on= 0; } + return 0; +} - thd->variables.wsrep_on = 1; +static void wsrep_RSU_end(THD *thd) +{ + WSREP_DEBUG("RSU END: %lld : %s", wsrep_thd_trx_seqno(thd), + WSREP_QUERY(thd)); + if (thd->wsrep_cs().end_rsu()) + { + WSREP_WARN("Failed to end RSU, server may need to be restarted"); + } + thd->variables.wsrep_on= 1; } int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, const TABLE_LIST* table_list, Alter_info* alter_info) { - int ret= 0; - /* No isolation for applier or replaying threads. */ - if (thd->wsrep_exec_mode == REPL_RECV) - return 0; + if (!wsrep_thd_is_local(thd)) return 0; + int ret= 0; mysql_mutex_lock(&thd->LOCK_thd_data); - if (thd->wsrep_conflict_state == MUST_ABORT) + if (thd->wsrep_trx().state() == wsrep::transaction::s_must_abort) { WSREP_INFO("thread: %lld schema: %s query: %s has been aborted due to multi-master conflict", (longlong) thd->thread_id, thd->get_db(), thd->query()); @@ -1820,20 +1965,20 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, } mysql_mutex_unlock(&thd->LOCK_thd_data); - DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); - DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + DBUG_ASSERT(wsrep_thd_is_local(thd)); + DBUG_ASSERT(thd->wsrep_trx().ws_meta().seqno().is_undefined()); - if (thd->has_read_only_protection()) + if (thd->global_read_lock.is_acquired()) { - WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lld", - thd->query(), (longlong) thd->thread_id); + WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %llu", + WSREP_QUERY(thd), thd->thread_id); return -1; } if (wsrep_debug && thd->mdl_context.has_locks()) { - WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lld", - thd->query(), (longlong) thd->thread_id); + WSREP_DEBUG("thread holds MDL locks at TI begin: %s %llu", + WSREP_QUERY(thd), thd->thread_id); } /* @@ -1845,11 +1990,11 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, */ if (wsrep_auto_increment_control) { - thd->variables.auto_increment_offset = 1; - thd->variables.auto_increment_increment = 1; + thd->variables.auto_increment_offset= 1; + thd->variables.auto_increment_increment= 1; } - if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE) + if (thd->variables.wsrep_on && wsrep_thd_is_local(thd)) { switch (thd->variables.wsrep_OSU_method) { case WSREP_OSU_TOI: @@ -1865,48 +2010,53 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, break; } switch (ret) { - case 0: thd->wsrep_exec_mode= TOTAL_ORDER; break; + case 0: /* wsrep_TOI_begin sould set toi mode */ break; case 1: /* TOI replication skipped, treat as success */ - ret = 0; + ret= 0; break; case -1: /* TOI replication failed, treat as error */ break; } } + return ret; } void wsrep_to_isolation_end(THD *thd) { - if (thd->wsrep_exec_mode == TOTAL_ORDER) + DBUG_ASSERT(wsrep_thd_is_local_toi(thd) || + wsrep_thd_is_in_rsu(thd)); + if (wsrep_thd_is_local_toi(thd)) { - switch(thd->variables.wsrep_OSU_method) - { - case WSREP_OSU_TOI: wsrep_TOI_end(thd); break; - case WSREP_OSU_RSU: wsrep_RSU_end(thd); break; - default: - WSREP_WARN("Unsupported wsrep OSU method at isolation end: %lu", - thd->variables.wsrep_OSU_method); - break; - } - wsrep_cleanup_transaction(thd); + DBUG_ASSERT(thd->variables.wsrep_OSU_method == WSREP_OSU_TOI); + wsrep_TOI_end(thd); } + else if (wsrep_thd_is_in_rsu(thd)) + { + DBUG_ASSERT(thd->variables.wsrep_OSU_method == WSREP_OSU_RSU); + wsrep_RSU_end(thd); + } + else + { + DBUG_ASSERT(0); + } + if (wsrep_emulate_bin_log) wsrep_thd_binlog_trx_reset(thd); } #define WSREP_MDL_LOG(severity, msg, schema, schema_len, req, gra) \ WSREP_##severity( \ "%s\n" \ "schema: %.*s\n" \ - "request: (%lld \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \ - "granted: (%lld \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \ + "request: (%llu \tseqno %lld \twsrep (%s, %s, %s) cmd %d %d \t%s)\n" \ + "granted: (%llu \tseqno %lld \twsrep (%s, %s, %s) cmd %d %d \t%s)", \ msg, schema_len, schema, \ - (longlong) req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ - req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \ + req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ + wsrep_thd_client_mode_str(req), wsrep_thd_client_state_str(req), wsrep_thd_transaction_state_str(req), \ req->get_command(), req->lex->sql_command, req->query(), \ - (longlong) gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ - gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \ + gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ + wsrep_thd_client_mode_str(gra), wsrep_thd_client_state_str(gra), wsrep_thd_transaction_state_str(gra), \ gra->get_command(), gra->lex->sql_command, gra->query()); /** @@ -1919,58 +2069,47 @@ void wsrep_to_isolation_end(THD *thd) @retval FALSE Lock request cannot be granted */ -bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, +void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, MDL_ticket *ticket, const MDL_key *key) { /* Fallback to the non-wsrep behaviour */ - if (!WSREP_ON) return FALSE; + if (!WSREP_ON) return; THD *request_thd= requestor_ctx->get_thd(); THD *granted_thd= ticket->get_ctx()->get_thd(); - bool ret= false; const char* schema= key->db_name(); int schema_len= key->db_name_length(); mysql_mutex_lock(&request_thd->LOCK_thd_data); + if (wsrep_thd_is_toi(request_thd) || + wsrep_thd_is_applying(request_thd)) { - /* - We consider granting MDL exceptions only for appliers (BF THD) and ones - executing under TOI mode. - - Rules: - 1. If granted/owner THD is also an applier (BF THD) or one executing - under TOI mode, then we grant the requested lock to the requester - THD. - @return true - - 2. If granted/owner THD is executing a FLUSH command or already has an - explicit lock, then do not grant the requested lock to the requester - THD and it has to wait. - @return false - - 3. In all other cases the granted/owner THD is aborted and the requested - lock is not granted to the requester THD, thus it has to wait. - @return false - */ - if (request_thd->wsrep_exec_mode == TOTAL_ORDER || - request_thd->wsrep_exec_mode == REPL_RECV) - { mysql_mutex_unlock(&request_thd->LOCK_thd_data); WSREP_MDL_LOG(DEBUG, "MDL conflict ", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(wsrep_debug); mysql_mutex_lock(&granted_thd->LOCK_thd_data); - if (granted_thd->wsrep_exec_mode == TOTAL_ORDER || - granted_thd->wsrep_exec_mode == REPL_RECV) + if (wsrep_thd_is_toi(granted_thd) || + wsrep_thd_is_applying(granted_thd)) { - WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, - request_thd, granted_thd); - ticket->wsrep_report(true); - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - ret= true; + if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) + { + WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", + schema, schema_len, request_thd, granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + } + else + { + WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + unireg_abort(1); + } } else if (granted_thd->lex->sql_command == SQLCOM_FLUSH || granted_thd->mdl_context.has_explicit_locks()) @@ -1978,173 +2117,57 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, WSREP_DEBUG("BF thread waiting for FLUSH"); ticket->wsrep_report(wsrep_debug); mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - ret= false; + } + else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + WSREP_DEBUG("DROP caused BF abort, conf %s", + wsrep_thd_transaction_state_str(granted_thd)); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); } else { - /* Print some debug information. */ - if (wsrep_debug) + WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + if (granted_thd->wsrep_trx().active()) { - if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE || - request_thd->lex->sql_command == SQLCOM_DROP_SEQUENCE) - { - WSREP_DEBUG("DROP caused BF abort, conf %d", granted_thd->wsrep_conflict_state); - } - else if (granted_thd->wsrep_query_state == QUERY_COMMITTING) + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + wsrep_abort_thd(request_thd, granted_thd, 1); + } + else + { + /* + Granted_thd is likely executing with wsrep_on=0. If the requesting + thd is BF, BF abort and wait. + */ + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + if (wsrep_thd_is_BF(request_thd, FALSE)) { - WSREP_DEBUG("MDL granted, but committing thd abort scheduled"); + ha_abort_transaction(request_thd, granted_thd, TRUE); } else { - WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", schema, schema_len, - request_thd, granted_thd); + WSREP_MDL_LOG(INFO, "MDL unknown BF-BF conflict", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(true); + unireg_abort(1); } - ticket->wsrep_report(true); } - - mysql_mutex_unlock(&granted_thd->LOCK_thd_data); - wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1); - ret= false; } } else { mysql_mutex_unlock(&request_thd->LOCK_thd_data); } - - return ret; -} - - -pthread_handler_t start_wsrep_THD(void *arg) -{ - THD *thd; - wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg; - - if (my_thread_init() || (!(thd= new THD(next_thread_id(), true)))) - { - goto error; - } - - mysql_mutex_lock(&LOCK_thread_count); - - if (wsrep_gtid_mode) - { - /* Adjust domain_id. */ - thd->variables.gtid_domain_id= wsrep_gtid_domain_id; - } - - thd->real_id=pthread_self(); // Keep purify happy - thread_created++; - threads.append(thd); - - my_net_init(&thd->net,(st_vio*) 0, thd, MYF(0)); - - DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); - thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); - (void) mysql_mutex_unlock(&LOCK_thread_count); - - /* from bootstrap()... */ - thd->bootstrap=1; - thd->max_client_packet_length= thd->net.max_packet; - thd->security_ctx->master_access= ~(ulong)0; - - /* from handle_one_connection... */ - pthread_detach_this_thread(); - - mysql_thread_set_psi_id(thd->thread_id); - thd->thr_create_utime= microsecond_interval_timer(); - if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) - { - close_connection(thd, ER_OUT_OF_RESOURCES); - statistic_increment(aborted_connects,&LOCK_status); - MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); - goto error; - } - -// </5.1.17> - /* - handle_one_connection() is normally the only way a thread would - start and would always be on the very high end of the stack , - therefore, the thread stack always starts at the address of the - first local variable of handle_one_connection, which is thd. We - need to know the start of the stack so that we could check for - stack overruns. - */ - DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", - (long long)thd->thread_id)); - /* now that we've called my_thread_init(), it is safe to call DBUG_* */ - - thd->thread_stack= (char*) &thd; - if (thd->store_globals()) - { - close_connection(thd, ER_OUT_OF_RESOURCES); - statistic_increment(aborted_connects,&LOCK_status); - MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); - goto error; - } - - thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; - thd->security_ctx->skip_grants(); - - /* handle_one_connection() again... */ - //thd->version= refresh_version; - thd->proc_info= 0; - thd->set_command(COM_SLEEP); - thd->init_for_queries(); - - mysql_mutex_lock(&LOCK_thread_count); - wsrep_running_threads++; - mysql_cond_broadcast(&COND_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); - - processor(thd); - - close_connection(thd, 0); - - mysql_mutex_lock(&LOCK_thread_count); - wsrep_running_threads--; - WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); - mysql_cond_broadcast(&COND_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); - - // Note: We can't call THD destructor without crashing - // if plugins have not been initialized. However, in most of the - // cases this means that pre SE initialization SST failed and - // we are going to exit anyway. - if (plugins_are_initialized) - { - net_end(&thd->net); - MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); - } - else - { - // TODO: lightweight cleanup to get rid of: - // 'Error in my_thread_global_end(): 2 threads didn't exit' - // at server shutdown - } - - unlink_not_visible_thd(thd); - delete thd; - my_thread_end(); - return(NULL); - -error: - WSREP_ERROR("Failed to create/initialize system thread"); - - /* Abort if its the first applier/rollbacker thread. */ - if (!mysqld_server_initialized) - unireg_abort(1); - else - return NULL; } - /**/ static bool abort_replicated(THD *thd) { bool ret_code= false; - if (thd->wsrep_query_state== QUERY_COMMITTING) + if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) { WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); @@ -2154,38 +2177,34 @@ static bool abort_replicated(THD *thd) return ret_code; } - /**/ static inline bool is_client_connection(THD *thd) { return (thd->wsrep_client_thread && thd->variables.wsrep_on); } - static inline bool is_replaying_connection(THD *thd) { bool ret; mysql_mutex_lock(&thd->LOCK_thd_data); - ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false; + ret= (thd->wsrep_trx().state() == wsrep::transaction::s_replaying) ? true : false; mysql_mutex_unlock(&thd->LOCK_thd_data); return ret; } - static inline bool is_committing_connection(THD *thd) { bool ret; mysql_mutex_lock(&thd->LOCK_thd_data); - ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false; + ret= (thd->wsrep_trx().state() == wsrep::transaction::s_committing) ? true : false; mysql_mutex_unlock(&thd->LOCK_thd_data); return ret; } - static bool have_client_connections() { THD *tmp; @@ -2222,7 +2241,6 @@ static void wsrep_close_thread(THD *thd) } } - static my_bool have_committing_connections() { THD *tmp; @@ -2236,6 +2254,7 @@ static my_bool have_committing_connections() if (is_committing_connection(tmp)) { + mysql_mutex_unlock(&LOCK_thread_count); return TRUE; } } @@ -2243,7 +2262,6 @@ static my_bool have_committing_connections() return FALSE; } - int wsrep_wait_committing_connections_close(int wait_time) { int sleep_time= 100; @@ -2261,8 +2279,7 @@ int wsrep_wait_committing_connections_close(int wait_time) return 0; } - -void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) +void wsrep_close_client_connections(my_bool wait_to_end, THD* except_caller_thd) { /* First signal all threads that it's time to die @@ -2305,12 +2322,7 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd) /* instead of wsrep_close_thread() we do now soft kill by THD::awake */ - mysql_mutex_lock(&tmp->LOCK_thd_data); - tmp->awake(KILL_CONNECTION); - - mysql_mutex_unlock(&tmp->LOCK_thd_data); - } mysql_mutex_unlock(&LOCK_thread_count); @@ -2360,7 +2372,6 @@ void wsrep_close_applier(THD *thd) wsrep_close_thread(thd); } - void wsrep_close_threads(THD *thd) { THD *tmp; @@ -2386,10 +2397,12 @@ void wsrep_wait_appliers_close(THD *thd) { /* Wait for wsrep appliers to gracefully exit */ mysql_mutex_lock(&LOCK_thread_count); - while (wsrep_running_threads > 1) - // 1 is for rollbacker thread which needs to be killed explicitly. - // This gotta be fixed in a more elegant manner if we gonna have arbitrary - // number of non-applier wsrep threads. + while (wsrep_running_threads > 2) + /* + 2 is for rollbacker thread which needs to be killed explicitly. + This gotta be fixed in a more elegant manner if we gonna have arbitrary + number of non-applier wsrep threads. + */ { if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) { @@ -2425,7 +2438,6 @@ void wsrep_wait_appliers_close(THD *thd) */ } - void wsrep_kill_mysql(THD *thd) { if (mysqld_server_started) @@ -2442,267 +2454,167 @@ void wsrep_kill_mysql(THD *thd) } } - -static int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) +void +wsrep_last_committed_id(wsrep_gtid_t* gtid) { - String log_query; - sp_head *sp = thd->lex->sphead; - sql_mode_t saved_mode= thd->variables.sql_mode; - String retstr(64); - LEX_CSTRING returns= empty_clex_str; - retstr.set_charset(system_charset_info); - - log_query.set_charset(system_charset_info); - - if (sp->m_handler->type() == TYPE_ENUM_FUNCTION) - { - sp_returns_type(thd, retstr, sp); - returns= retstr.lex_cstring(); - } - if (sp->m_handler-> - show_create_sp(thd, &log_query, - sp->m_explicit_name ? sp->m_db : null_clex_str, - sp->m_name, sp->m_params, returns, - sp->m_body, sp->chistics(), - thd->lex->definer[0], - thd->lex->create_info, - saved_mode)) - { - WSREP_WARN("SP create string failed: schema: %s, query: %s", - thd->get_db(), thd->query()); - return 1; - } - - return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); + wsrep::gtid ret= Wsrep_server_state::instance().last_committed_gtid(); + memcpy(gtid->uuid.data, ret.id().data(), sizeof(gtid->uuid.data)); + gtid->seqno= ret.seqno().get(); } - -extern int wsrep_on(THD *thd) +void +wsrep_node_uuid(wsrep_uuid_t& uuid) { - return (int)(WSREP(thd)); + uuid= node_uuid; } - -extern "C" bool wsrep_thd_is_wsrep_on(THD *thd) +int wsrep_must_ignore_error(THD* thd) { - return thd->variables.wsrep_on; -} + const int error= thd->get_stmt_da()->sql_errno(); + const uint flags= sql_command_flags[thd->lex->sql_command]; + DBUG_ASSERT(error); + DBUG_ASSERT((wsrep_thd_is_toi(thd)) || + (wsrep_thd_is_applying(thd) && thd->wsrep_apply_toi)); -bool wsrep_consistency_check(THD *thd) -{ - return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; -} - - -extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode) -{ - thd->wsrep_exec_mode= mode; -} + if ((wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_DDL)) + goto ignore_error; + if ((flags & CF_WSREP_MAY_IGNORE_ERRORS) && + (wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL)) + { + switch (error) + { + case ER_DB_DROP_EXISTS: + case ER_BAD_TABLE_ERROR: + case ER_CANT_DROP_FIELD_OR_KEY: + goto ignore_error; + } + } -extern "C" void wsrep_thd_set_query_state( - THD *thd, enum wsrep_query_state state) -{ - thd->wsrep_query_state= state; -} - - -void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state) -{ - if (WSREP(thd)) thd->wsrep_conflict_state= state; -} - - -enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd) -{ - return thd->wsrep_exec_mode; -} - - -const char *wsrep_thd_exec_mode_str(THD *thd) -{ - return - (!thd) ? "void" : - (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" : - (thd->wsrep_exec_mode == REPL_RECV) ? "applier" : - (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" : - (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void"; -} - - -enum wsrep_query_state wsrep_thd_query_state(THD *thd) -{ - return thd->wsrep_query_state; -} - - -const char *wsrep_thd_query_state_str(THD *thd) -{ - return - (!thd) ? "void" : - (thd->wsrep_query_state == QUERY_IDLE) ? "idle" : - (thd->wsrep_query_state == QUERY_EXEC) ? "executing" : - (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" : - (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" : - (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void"; -} - - -enum wsrep_conflict_state wsrep_thd_get_conflict_state(THD *thd) -{ - return thd->wsrep_conflict_state; -} - - -const char *wsrep_thd_conflict_state_str(THD *thd) -{ - return - (!thd) ? "void" : - (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" : - (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" : - (thd->wsrep_conflict_state == ABORTING) ? "aborting" : - (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" : - (thd->wsrep_conflict_state == REPLAYING) ? "replaying" : - (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" : - (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void"; -} - - -wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd) -{ - return &thd->wsrep_ws_handle; -} - - -void wsrep_thd_LOCK(THD *thd) -{ - mysql_mutex_lock(&thd->LOCK_thd_data); -} - + return 0; -void wsrep_thd_UNLOCK(THD *thd) -{ - mysql_mutex_unlock(&thd->LOCK_thd_data); +ignore_error: + WSREP_WARN("Ignoring error '%s' on query. " + "Default database: '%s'. Query: '%s', Error_code: %d", + thd->get_stmt_da()->message(), + print_slave_db_safe(thd->db.str), + thd->query(), + error); + return 1; } - -extern "C" time_t wsrep_thd_query_start(THD *thd) +int wsrep_ignored_error_code(Log_event* ev, int error) { - return thd->query_start(); -} + const THD* thd= ev->thd; + DBUG_ASSERT(error); + DBUG_ASSERT(wsrep_thd_is_applying(thd) && + !wsrep_thd_is_local_toi(thd)); -extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd) -{ - return thd->wsrep_rand; -} - -longlong wsrep_thd_trx_seqno(THD *thd) -{ - return (thd) ? thd->wsrep_trx_meta.gtid.seqno : WSREP_SEQNO_UNDEFINED; -} + if ((wsrep_ignore_apply_errors & WSREP_IGNORE_ERRORS_ON_RECONCILING_DML)) + { + const int ev_type= ev->get_type_code(); + if ((ev_type == DELETE_ROWS_EVENT || ev_type == DELETE_ROWS_EVENT_V1) + && error == ER_KEY_NOT_FOUND) + goto ignore_error; + } + return 0; -extern "C" query_id_t wsrep_thd_query_id(THD *thd) -{ - return thd->query_id; +ignore_error: + WSREP_WARN("Ignoring error '%s' on %s event. Error_code: %d", + thd->get_stmt_da()->message(), + ev->get_type_str(), + error); + return 1; } - -char *wsrep_thd_query(THD *thd) +bool wsrep_provider_is_SR_capable() { - return (thd) ? thd->query() : NULL; + return Wsrep_server_state::has_capability(wsrep::provider::capability::streaming); } -extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd) +int wsrep_ordered_commit_if_no_binlog(THD* thd, bool all) { - return thd->wsrep_last_query_id; + if (((wsrep_thd_is_local(thd) && + (WSREP_EMULATE_BINLOG(thd) || !thd->variables.sql_log_bin)) || + (wsrep_thd_is_applying(thd) && !opt_log_slave_updates)) + && wsrep_thd_trx_seqno(thd) > 0) + { + wsrep_apply_error unused; + return wsrep_ordered_commit(thd, all, unused); + } + return 0; } - -extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id) +wsrep_status_t wsrep_tc_log_commit(THD* thd) { - thd->wsrep_last_query_id= id; -} - + int cookie; + my_xid xid= thd->transaction.xid_state.xid.get_my_xid(); -extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) -{ - if (signal) + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_LOAD); + if (wsrep_before_commit(thd, true)) { - thd->awake(KILL_QUERY); + WSREP_DEBUG("wsrep_tc_log_commit: wsrep_before_commit failed %llu", + thd->thread_id); + return WSREP_TRX_FAIL; } - else + cookie= tc_log->log_and_order(thd, xid, 1, false, true); + if (wsrep_after_commit(thd, true)) { - mysql_mutex_lock(&LOCK_wsrep_replaying); - mysql_cond_broadcast(&COND_wsrep_replaying); - mysql_mutex_unlock(&LOCK_wsrep_replaying); + WSREP_DEBUG("wsrep_tc_log_commit: wsrep_after_commit failed %llu", + thd->thread_id); + return WSREP_TRX_FAIL; + } + if (!cookie) + { + WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie); + return WSREP_TRX_FAIL; + } + if (tc_log->unlog(cookie, xid)) + { + WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie); + return WSREP_TRX_FAIL; } -} - - -int wsrep_thd_retry_counter(THD *thd) -{ - return(thd->wsrep_retry_counter); -} - - -extern "C" bool wsrep_thd_ignore_table(THD *thd) -{ - return thd->wsrep_ignore_table; -} - -extern int -wsrep_trx_order_before(THD *thd1, THD *thd2) -{ - if (wsrep_thd_trx_seqno(thd1) < wsrep_thd_trx_seqno(thd2)) { - WSREP_DEBUG("BF conflict, order: %lld %lld\n", - (long long)wsrep_thd_trx_seqno(thd1), - (long long)wsrep_thd_trx_seqno(thd2)); - return 1; + if (wsrep_after_statement(thd)) + { + return WSREP_TRX_FAIL; + } + /* Set wsrep transaction id if not set. */ + if (thd->wsrep_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + { + thd->set_wsrep_next_trx_id(thd->query_id); } - WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", - (long long)wsrep_thd_trx_seqno(thd1), - (long long)wsrep_thd_trx_seqno(thd2)); - return 0; + DBUG_ASSERT(thd->wsrep_next_trx_id() != WSREP_UNDEFINED_TRX_ID); + } + if (wsrep_start_transaction(thd, thd->wsrep_next_trx_id())) + { + return WSREP_TRX_FAIL; + } + DBUG_ASSERT(thd->wsrep_trx_id() != WSREP_UNDEFINED_TRX_ID); + return WSREP_OK; } - -int wsrep_trx_is_aborting(THD *thd_ptr) +int wsrep_thd_retry_counter(const THD *thd) { - if (thd_ptr) { - if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) || - (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) { - return 1; - } - } - return 0; + return thd->wsrep_retry_counter; } - -void wsrep_copy_query(THD *thd) +extern bool wsrep_thd_ignore_table(THD *thd) { - thd->wsrep_retry_command = thd->get_command(); - thd->wsrep_retry_query_len = thd->query_length(); - if (thd->wsrep_retry_query) { - my_free(thd->wsrep_retry_query); - } - thd->wsrep_retry_query = (char *)my_malloc( - thd->wsrep_retry_query_len + 1, MYF(0)); - strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); - thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; + return thd->wsrep_ignore_table; } - bool wsrep_is_show_query(enum enum_sql_command command) { DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; } - bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table, HA_CREATE_INFO *create_info) @@ -2753,8 +2665,7 @@ wsrep_error_label: #endif } - -static int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) { LEX *lex= thd->lex; String stmt_query; @@ -2809,88 +2720,165 @@ static int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) buf, buf_len); } -/***** callbacks for wsrep service ************/ - -my_bool get_wsrep_debug() +void* start_wsrep_THD(void *arg) { - return wsrep_debug; -} + THD *thd; -my_bool get_wsrep_load_data_splitting() -{ - return wsrep_load_data_splitting; -} + Wsrep_thd_args* thd_args= (Wsrep_thd_args*) arg; -long get_wsrep_protocol_version() -{ - return wsrep_protocol_version; -} + if (my_thread_init() || (!(thd= new THD(next_thread_id(), true)))) + { + goto error; + } -my_bool get_wsrep_drupal_282555_workaround() -{ - return wsrep_drupal_282555_workaround; -} + mysql_mutex_lock(&LOCK_thread_count); -my_bool get_wsrep_recovery() -{ - return wsrep_recovery; -} + if (wsrep_gtid_mode) + { + /* Adjust domain_id. */ + thd->variables.gtid_domain_id= wsrep_gtid_domain_id; + } -my_bool get_wsrep_log_conflicts() -{ - return wsrep_log_conflicts; -} + thd->real_id=pthread_self(); // Keep purify happy + thread_created++; + threads.append(thd); -wsrep_t *get_wsrep() -{ - return wsrep; -} + my_net_init(&thd->net,(st_vio*) 0, thd, MYF(0)); -my_bool get_wsrep_certify_nonPK() -{ - return wsrep_certify_nonPK; -} + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + (void) mysql_mutex_unlock(&LOCK_thread_count); -void wsrep_lock_rollback() -{ - mysql_mutex_lock(&LOCK_wsrep_rollback); -} + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ~(ulong)0; -void wsrep_unlock_rollback() -{ - mysql_cond_signal(&COND_wsrep_rollback); - mysql_mutex_unlock(&LOCK_wsrep_rollback); -} + /* from handle_one_connection... */ + pthread_detach_this_thread(); -my_bool wsrep_aborting_thd_contains(THD *thd) -{ - mysql_mutex_assert_owner(&LOCK_wsrep_rollback); - wsrep_aborting_thd_t abortees = wsrep_aborting_thd; - while (abortees) + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) { - if (abortees->aborting_thd == thd) - return true; - abortees = abortees->next; + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + goto error; } - return false; + +// </5.1.17> + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + if (thd->store_globals()) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + delete thd; + delete thd_args; + goto error; + } + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + thd->init_for_queries(); + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads++; + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + WSREP_DEBUG("wsrep system thread %llu, %p starting", + thd->thread_id, thd); + thd_args->fun()(thd, thd_args->args()); + + WSREP_DEBUG("wsrep system thread: %llu, %p closing", + thd->thread_id, thd); + + /* Wsrep may reset globals during thread context switches, store globals + before cleanup. */ + thd->store_globals(); + + close_connection(thd, 0); + + delete thd_args; + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads--; + WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + /* + Note: We can't call THD destructor without crashing + if plugins have not been initialized. However, in most of the + cases this means that pre SE initialization SST failed and + we are going to exit anyway. + */ + if (plugins_are_initialized) + { + net_end(&thd->net); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); + } + else + { + /* + TODO: lightweight cleanup to get rid of: + 'Error in my_thread_global_end(): 2 threads didn't exit' + at server shutdown + */ + } + + unlink_not_visible_thd(thd); + delete thd; + my_thread_end(); + return(NULL); + +error: + WSREP_ERROR("Failed to create/initialize system thread"); + + /* Abort if its the first applier/rollbacker thread. */ + if (!mysqld_server_initialized) + unireg_abort(1); + else + return NULL; } -void wsrep_aborting_thd_enqueue(THD *thd) +enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit) { - mysql_mutex_assert_owner(&LOCK_wsrep_rollback); - wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t) - my_malloc(sizeof(struct wsrep_aborting_thd), MYF(0)); - aborting->aborting_thd = thd; - aborting->next = wsrep_aborting_thd; - wsrep_aborting_thd = aborting; + switch (unit) + { + case WSREP_FRAG_BYTES: return wsrep::streaming_context::bytes; + case WSREP_FRAG_ROWS: return wsrep::streaming_context::row; + case WSREP_FRAG_STATEMENTS: return wsrep::streaming_context::statement; + default: + DBUG_ASSERT(0); + return wsrep::streaming_context::bytes; + } } -bool wsrep_node_is_donor() +/***** callbacks for wsrep service ************/ + +my_bool get_wsrep_recovery() { - return (WSREP_ON) ? (wsrep_config_state->get_status() == 2) : false; + return wsrep_recovery; } -bool wsrep_node_is_synced() +bool wsrep_consistency_check(THD *thd) { - return (WSREP_ON) ? (wsrep_config_state->get_status() == 4) : false; + return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; } diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index b434c248347..957f1ef3ab1 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -1,4 +1,4 @@ -/* Copyright 2008-2015 Codership Oy <http://www.codership.com> +/* Copyright 2008-2017 Codership Oy <http://www.codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,24 +13,32 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ -#include <wsrep.h> - #ifndef WSREP_MYSQLD_H #define WSREP_MYSQLD_H -#include <mysql/plugin.h> -#include <mysql/service_wsrep.h> +#include <wsrep.h> #ifdef WITH_WSREP +#include <mysql/plugin.h> +#include "mysql/service_wsrep.h" + +#include <my_global.h> +#include <my_pthread.h> +#include "log.h" +#include "mysqld.h" + typedef struct st_mysql_show_var SHOW_VAR; #include <sql_priv.h> -//#include "rpl_gtid.h" -#include "../wsrep/wsrep_api.h" #include "mdl.h" -#include "mysqld.h" #include "sql_table.h" +#include "wsrep/provider.hpp" +#include "wsrep/streaming_context.hpp" +#include "wsrep_api.h" +#include <vector> +#include "wsrep_server_state.h" + #define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX class set_var; @@ -42,20 +50,7 @@ enum wsrep_consistency_check_mode { CONSISTENCY_CHECK_RUNNING, }; -struct wsrep_thd_shadow { - ulonglong options; - uint server_status; - enum wsrep_exec_mode wsrep_exec_mode; - Vio *vio; - ulong tx_isolation; - const char *db; - size_t db_length; - my_hrtime_t user_time; - longlong row_count_func; -}; - // Global wsrep parameters -extern wsrep_t* wsrep; // MySQL wsrep options extern const char* wsrep_provider; @@ -69,24 +64,33 @@ extern const char* wsrep_data_home_dir; extern const char* wsrep_dbug_option; extern long wsrep_slave_threads; extern int wsrep_slave_count_change; +extern MYSQL_PLUGIN_IMPORT my_bool wsrep_debug; extern my_bool wsrep_convert_LOCK_to_trx; extern ulong wsrep_retry_autocommit; extern my_bool wsrep_auto_increment_control; +extern my_bool wsrep_drupal_282555_workaround; extern my_bool wsrep_incremental_data_collection; extern const char* wsrep_start_position; extern ulong wsrep_max_ws_size; extern ulong wsrep_max_ws_rows; extern const char* wsrep_notify_cmd; -extern long wsrep_max_protocol_version; +extern my_bool wsrep_certify_nonPK; +extern long int wsrep_protocol_version; extern ulong wsrep_forced_binlog_format; extern my_bool wsrep_desync; extern ulong wsrep_reject_queries; +extern my_bool wsrep_recovery; extern my_bool wsrep_replicate_myisam; +extern my_bool wsrep_log_conflicts; extern ulong wsrep_mysql_replication_bundle; +extern my_bool wsrep_load_data_splitting; extern my_bool wsrep_restart_slave; extern my_bool wsrep_restart_slave_activated; extern my_bool wsrep_slave_FK_checks; extern my_bool wsrep_slave_UK_checks; +extern ulong wsrep_trx_fragment_unit; +extern ulong wsrep_SR_store_type; +extern uint wsrep_ignore_apply_errors; extern ulong wsrep_running_threads; extern bool wsrep_new_cluster; extern bool wsrep_gtid_mode; @@ -105,15 +109,34 @@ enum enum_wsrep_OSU_method { }; enum enum_wsrep_sync_wait { - WSREP_SYNC_WAIT_NONE = 0x0, + WSREP_SYNC_WAIT_NONE= 0x0, // select, begin - WSREP_SYNC_WAIT_BEFORE_READ = 0x1, - WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE = 0x2, - WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE = 0x4, - WSREP_SYNC_WAIT_BEFORE_SHOW = 0x8, - WSREP_SYNC_WAIT_MAX = 0xF + WSREP_SYNC_WAIT_BEFORE_READ= 0x1, + WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE= 0x2, + WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE= 0x4, + WSREP_SYNC_WAIT_BEFORE_SHOW= 0x8, + WSREP_SYNC_WAIT_MAX= 0xF +}; + +enum enum_wsrep_ignore_apply_error { + WSREP_IGNORE_ERRORS_NONE= 0x0, + WSREP_IGNORE_ERRORS_ON_RECONCILING_DDL= 0x1, + WSREP_IGNORE_ERRORS_ON_RECONCILING_DML= 0x2, + WSREP_IGNORE_ERRORS_ON_DDL= 0x4, + WSREP_IGNORE_ERRORS_MAX= 0x7 }; +// Streaming Replication +#define WSREP_FRAG_BYTES 0 +#define WSREP_FRAG_ROWS 1 +#define WSREP_FRAG_STATEMENTS 2 + +#define WSREP_SR_STORE_NONE 0 +#define WSREP_SR_STORE_TABLE 1 + +extern const char *wsrep_fragment_units[]; +extern const char *wsrep_SR_store_types[]; + // MySQL status variables extern my_bool wsrep_connected; extern my_bool wsrep_ready; @@ -126,9 +149,18 @@ extern long long wsrep_local_bf_aborts; extern const char* wsrep_provider_name; extern const char* wsrep_provider_version; extern const char* wsrep_provider_vendor; +extern char* wsrep_provider_capabilities; +extern char* wsrep_cluster_capabilities; + +int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff); +int wsrep_show_ready(THD *thd, SHOW_VAR *var, char *buff); +void wsrep_free_status(THD *thd); +void wsrep_update_cluster_state_uuid(const char* str); + +/* Filters out --wsrep-new-cluster oprtion from argv[] + * should be called in the very beginning of main() */ +void wsrep_filter_new_cluster (int* argc, char* argv[]); -int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff, - enum enum_var_type scope); int wsrep_init(); void wsrep_deinit(bool free_options); @@ -144,19 +176,17 @@ bool wsrep_before_SE(); // initialize wsrep before storage * @param before wsrep_before_SE() value */ void wsrep_init_startup(bool before); +/* Recover streaming transactions from fragment storage */ +void wsrep_recover_sr_from_storage(THD *); + // Other wsrep global variables extern my_bool wsrep_inited; // whether wsrep is initialized ? - -extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); -extern "C" void wsrep_thd_set_query_state( - THD *thd, enum wsrep_query_state state); - -extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); - +extern "C" void wsrep_fire_rollbacker(THD *thd); extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); extern "C" time_t wsrep_thd_query_start(THD *thd); -extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern void wsrep_close_client_connections(my_bool wait_to_end, + THD *except_caller_thd= NULL); extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); @@ -166,60 +196,87 @@ extern void wsrep_wait_appliers_close(THD *thd); extern void wsrep_close_applier_threads(int count); extern void wsrep_kill_mysql(THD *thd); + /* new defines */ extern void wsrep_stop_replication(THD *thd); extern bool wsrep_start_replication(); -extern bool wsrep_must_sync_wait(THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); -extern bool wsrep_sync_wait(THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); +extern void wsrep_shutdown_replication(); +extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); +extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); +extern enum wsrep::provider::status +wsrep_sync_wait_upto (THD* thd, wsrep_gtid_t* upto, int timeout); +extern void wsrep_last_committed_id (wsrep_gtid_t* gtid); extern int wsrep_check_opts(); extern void wsrep_prepend_PATH (const char* path); /* Other global variables */ extern wsrep_seqno_t wsrep_locked_seqno; - #define WSREP_ON \ - (global_system_variables.wsrep_on) - -#define WSREP_ON_NEW \ ((global_system_variables.wsrep_on) && \ wsrep_provider && \ strcmp(wsrep_provider, WSREP_NONE)) -#define WSREP(thd) \ +/* use xxxxxx_NNULL macros when thd pointer is guaranteed to be non-null to + * avoid compiler warnings (GCC 6 and later) */ +#define WSREP_NNULL(thd) \ (WSREP_ON && thd->variables.wsrep_on) +#define WSREP(thd) \ + (thd && WSREP_NNULL(thd)) + +#define WSREP_CLIENT_NNULL(thd) \ + (WSREP_NNULL(thd) && thd->wsrep_client_thread) + #define WSREP_CLIENT(thd) \ (WSREP(thd) && thd->wsrep_client_thread) +#define WSREP_EMULATE_BINLOG_NNULL(thd) \ + (WSREP_NNULL(thd) && wsrep_emulate_bin_log) + #define WSREP_EMULATE_BINLOG(thd) \ (WSREP(thd) && wsrep_emulate_bin_log) -#define WSREP_FORMAT(my_format) \ - ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) \ - ? wsrep_forced_binlog_format : (ulong)(my_format)) +#define WSREP_BINLOG_FORMAT(my_format) \ + ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \ + wsrep_forced_binlog_format : my_format) // prefix all messages with "WSREP" -void wsrep_log(void (*fun)(const char *, ...), const char *format, ...); -#define WSREP_LOG(fun, ...) wsrep_log(fun, ## __VA_ARGS__) -#define WSREP_LOG_CONFLICT_THD(thd, role) \ - WSREP_LOG(sql_print_information, \ - "%s: \n " \ - " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \ - " SQL: %s", \ - role, thd_get_thread_id(thd), wsrep_thd_exec_mode_str(thd), \ - wsrep_thd_query_state_str(thd), \ - wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \ - wsrep_thd_query(thd) \ - ); - -#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \ - if (wsrep_debug || wsrep_log_conflicts) \ - { \ - WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\ - (bf_abort) ? "high priority abort" : "certification failure" \ - ); \ - if (bf_thd != NULL) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \ - if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \ +#define WSREP_LOG(fun, ...) \ + do { \ + char msg[1024]= {'\0'}; \ + snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \ + fun("WSREP: %s", msg); \ + } while(0) + +#define WSREP_DEBUG(...) \ + if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__) +#define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__) +#define WSREP_WARN(...) WSREP_LOG(sql_print_warning, ##__VA_ARGS__) +#define WSREP_ERROR(...) WSREP_LOG(sql_print_error, ##__VA_ARGS__) + +#define WSREP_LOG_CONFLICT_THD(thd, role) \ + WSREP_LOG(sql_print_information, \ + "%s: \n " \ + " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \ + " SQL: %s", \ + role, \ + thd_get_thread_id(thd), \ + wsrep_thd_client_mode_str(thd), \ + wsrep_thd_client_state_str(thd), \ + wsrep_thd_transaction_state_str(thd), \ + wsrep_thd_trx_seqno(thd), \ + wsrep_thd_query(thd) \ + ); + +#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \ + if (wsrep_debug || wsrep_log_conflicts) \ + { \ + WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:", \ + (bf_abort) ? "high priority abort" : "certification failure" \ + ); \ + if (bf_thd) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \ + if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \ + WSREP_LOG(sql_print_information, "context: %s:%d", __FILE__, __LINE__); \ } #define WSREP_PROVIDER_EXISTS \ @@ -232,15 +289,6 @@ extern void wsrep_ready_wait(); class Ha_trx_info; struct THD_TRANS; -void wsrep_register_hton(THD* thd, bool all); -void wsrep_brute_force_killer(THD *thd); -int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id); - -/* this is visible for client build so that innodb plugin gets this */ -typedef struct wsrep_aborting_thd { - struct wsrep_aborting_thd *next; - THD *aborting_thd; -} *wsrep_aborting_thd_t; extern mysql_mutex_t LOCK_wsrep_ready; extern mysql_cond_t COND_wsrep_ready; @@ -248,24 +296,26 @@ extern mysql_mutex_t LOCK_wsrep_sst; extern mysql_cond_t COND_wsrep_sst; extern mysql_mutex_t LOCK_wsrep_sst_init; extern mysql_cond_t COND_wsrep_sst_init; -extern mysql_mutex_t LOCK_wsrep_rollback; -extern mysql_cond_t COND_wsrep_rollback; extern int wsrep_replaying; extern mysql_mutex_t LOCK_wsrep_replaying; extern mysql_cond_t COND_wsrep_replaying; extern mysql_mutex_t LOCK_wsrep_slave_threads; extern mysql_mutex_t LOCK_wsrep_desync; +extern mysql_mutex_t LOCK_wsrep_SR_pool; +extern mysql_mutex_t LOCK_wsrep_SR_store; +extern mysql_mutex_t LOCK_wsrep_thd_pool; extern mysql_mutex_t LOCK_wsrep_config_state; -extern wsrep_aborting_thd_t wsrep_aborting_thd; extern my_bool wsrep_emulate_bin_log; extern int wsrep_to_isolation; #ifdef GTID_SUPPORT extern rpl_sidno wsrep_sidno; #endif /* GTID_SUPPORT */ extern my_bool wsrep_preordered_opt; -extern handlerton *wsrep_hton; #ifdef HAVE_PSI_INTERFACE + +extern PSI_cond_key key_COND_wsrep_thd; + extern PSI_mutex_key key_LOCK_wsrep_ready; extern PSI_mutex_key key_COND_wsrep_ready; extern PSI_mutex_key key_LOCK_wsrep_sst; @@ -274,12 +324,16 @@ extern PSI_mutex_key key_LOCK_wsrep_sst_init; extern PSI_cond_key key_COND_wsrep_sst_init; extern PSI_mutex_key key_LOCK_wsrep_sst_thread; extern PSI_cond_key key_COND_wsrep_sst_thread; -extern PSI_mutex_key key_LOCK_wsrep_rollback; -extern PSI_cond_key key_COND_wsrep_rollback; extern PSI_mutex_key key_LOCK_wsrep_replaying; extern PSI_cond_key key_COND_wsrep_replaying; extern PSI_mutex_key key_LOCK_wsrep_slave_threads; extern PSI_mutex_key key_LOCK_wsrep_desync; +extern PSI_mutex_key key_LOCK_wsrep_SR_pool; +extern PSI_mutex_key key_LOCK_wsrep_SR_store; +extern PSI_mutex_key key_LOCK_wsrep_thd_pool; +extern PSI_mutex_key key_LOCK_wsrep_global_seqno; +extern PSI_mutex_key key_LOCK_wsrep_thd_queue; +extern PSI_cond_key key_COND_wsrep_thd_queue; extern PSI_file_key key_file_wsrep_gra_log; #endif /* HAVE_PSI_INTERFACE */ @@ -287,42 +341,33 @@ struct TABLE_LIST; class Alter_info; int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, const TABLE_LIST* table_list, - Alter_info* alter_info = NULL); + Alter_info* alter_info= NULL); + void wsrep_to_isolation_end(THD *thd); -void wsrep_cleanup_transaction(THD *thd); + +bool wsrep_append_SR_keys(THD *thd); int wsrep_to_buf_helper( THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len); -extern bool -wsrep_grant_mdl_exception(MDL_context *requestor_ctx, - MDL_ticket *ticket, - const MDL_key *key); -IO_CACHE * get_trans_log(THD * thd); -bool wsrep_trans_cache_is_empty(THD *thd); -void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); -void thd_binlog_rollback_stmt(THD * thd); -void thd_binlog_trx_reset(THD * thd); +bool wsrep_stmt_rollback_is_safe(THD* thd); -typedef void (*wsrep_thd_processor_fun)(THD *); -pthread_handler_t start_wsrep_THD(void *arg); -int wsrep_wait_committing_connections_close(int wait_time); -extern void wsrep_close_client_connections(my_bool wait_to_end, - THD *except_caller_thd = NULL); -void wsrep_close_applier(THD *thd); -void wsrep_close_applier_threads(int count); -void wsrep_wait_appliers_close(THD *thd); -void wsrep_kill_mysql(THD *thd); -void wsrep_close_threads(THD *thd); -void wsrep_copy_query(THD *thd); -bool wsrep_is_show_query(enum enum_sql_command command); -void wsrep_replay_transaction(THD *thd); -bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, - TABLE_LIST* src_table, - HA_CREATE_INFO *create_info); +void wsrep_init_sidno(const wsrep_uuid_t&); bool wsrep_node_is_donor(); bool wsrep_node_is_synced(); +void wsrep_init_SR(); +void wsrep_verify_SE_checkpoint(const wsrep_uuid_t& uuid, wsrep_seqno_t seqno); +int wsrep_replay_from_SR_store(THD*, const wsrep_trx_meta_t&); +void wsrep_node_uuid(wsrep_uuid_t&); + +class Log_event; +int wsrep_ignored_error_code(Log_event* ev, int error); +int wsrep_must_ignore_error(THD* thd); + +bool wsrep_replicate_GTID(THD* thd); + typedef struct wsrep_key_arr { wsrep_key_t* keys; @@ -335,38 +380,125 @@ bool wsrep_prepare_keys_for_isolation(THD* thd, wsrep_key_arr_t* ka); void wsrep_keys_free(wsrep_key_arr_t* key_arr); -#define WSREP_BINLOG_FORMAT(my_format) \ - ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \ - wsrep_forced_binlog_format : my_format) +extern void +wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, + MDL_ticket *ticket, + const MDL_key *key); +IO_CACHE * get_trans_log(THD * thd); +bool wsrep_trans_cache_is_empty(THD *thd); +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); +void thd_binlog_rollback_stmt(THD * thd); +void thd_binlog_trx_reset(THD * thd); + +typedef void (*wsrep_thd_processor_fun)(THD*, void *); +class Wsrep_thd_args +{ + public: + Wsrep_thd_args(wsrep_thd_processor_fun fun, void* args) + : + fun_ (fun), + args_(args) + { } + + wsrep_thd_processor_fun fun() { return fun_; } + + void* args() { return args_; } -#else /* WITH_WSREP */ + private: + + Wsrep_thd_args(const Wsrep_thd_args&); + Wsrep_thd_args& operator=(const Wsrep_thd_args&); + + wsrep_thd_processor_fun fun_; + void* args_; +}; + +void* start_wsrep_THD(void*); + +void wsrep_close_threads(THD *thd); +bool wsrep_is_show_query(enum enum_sql_command command); +void wsrep_replay_transaction(THD *thd); +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info); +bool wsrep_node_is_donor(); +bool wsrep_node_is_synced(); + +/** + * Check if the wsrep provider (ie the Galera library) is capable of + * doing streaming replication. + * @return true if SR capable + */ +bool wsrep_provider_is_SR_capable(); + +/** + * Mark current commit ordered if binlogging is not enabled. + * + * The purpose of this function is to leave commit order critical + * section if binlog is not enabled. + * + * The function can be called from inside storage engine during commit. + * Binlog options are checked inside the function. + * + * @return Zero in case of success, non-zero in case of failure. + */ +int wsrep_ordered_commit_if_no_binlog(THD*, bool); + +/** + * Commit the current transaction with the + * MySQL "Transaction Coordinator Log" (see `class TC_LOG` in sql/log.h). + * Calling this function will generate and assign a new wsrep transaction id + * for `thd`. + * @return WSREP_OK on success or other WSREP_* error code on failure + */ +wsrep_status_t wsrep_tc_log_commit(THD* thd); + +/** + * Initialize WSREP server instance. + * + * @return Zero on success, non-zero on error. + */ +int wsrep_init_server(); + +/** + * Initialize WSREP globals. This should be done after server initialization + * is complete and the server has joined to the cluster. + * + */ +void wsrep_init_globals(); + +/** + * Deinit and release WSREP resources. + */ +void wsrep_deinit_server(); + +/** + * Convert streaming fragment unit (WSREP_FRAG_BYTES, WSREP_FRAG_ROWS...) + * to corresponding wsrep-lib fragment_unit + */ +enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit); + +#else /* !WITH_WSREP */ + +/* These macros are needed to compile MariaDB without WSREP support + * (e.g. embedded) */ #define WSREP(T) (0) #define WSREP_ON (0) #define WSREP_EMULATE_BINLOG(thd) (0) -#define WSREP_CLIENT(thd) (0) -#define WSREP_FORMAT(my_format) ((ulong)my_format) +#define WSREP_EMULATE_BINLOG_NNULL(thd) (0) +#define WSREP_BINLOG_FORMAT(my_format) ((ulong)my_format) #define WSREP_PROVIDER_EXISTS (0) #define wsrep_emulate_bin_log (0) #define wsrep_to_isolation (0) -#define wsrep_init() (1) -#define wsrep_prepend_PATH(X) #define wsrep_before_SE() (0) #define wsrep_init_startup(X) -#define wsrep_must_sync_wait(...) (0) -#define wsrep_sync_wait(...) (0) -#define wsrep_to_isolation_begin(...) (0) -#define wsrep_register_hton(...) do { } while(0) #define wsrep_check_opts() (0) -#define wsrep_stop_replication(X) do { } while(0) -#define wsrep_inited (0) -#define wsrep_deinit(X) do { } while(0) -#define wsrep_recover() do { } while(0) -#define wsrep_slave_threads (1) -#define wsrep_replicate_myisam (0) #define wsrep_thr_init() do {} while(0) #define wsrep_thr_deinit() do {} while(0) -#define wsrep_running_threads (0) -#define WSREP_BINLOG_FORMAT(my_format) my_format +#define wsrep_init_globals() do {} while(0) +#define wsrep_create_appliers(X) do {} while(0) + #endif /* WITH_WSREP */ + #endif /* WSREP_MYSQLD_H */ diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc index 92bcc8eda43..ad94aecb6b4 100644 --- a/sql/wsrep_notify.cc +++ b/sql/wsrep_notify.cc @@ -18,22 +18,8 @@ #include "wsrep_priv.h" #include "wsrep_utils.h" - -static const char* _status_str(wsrep_member_status_t status) -{ - switch (status) - { - case WSREP_MEMBER_UNDEFINED: return "Undefined"; - case WSREP_MEMBER_JOINER: return "Joiner"; - case WSREP_MEMBER_DONOR: return "Donor"; - case WSREP_MEMBER_JOINED: return "Joined"; - case WSREP_MEMBER_SYNCED: return "Synced"; - default: return "Error(?)"; - } -} - -void wsrep_notify_status (wsrep_member_status_t status, - const wsrep_view_info_t* view) +void wsrep_notify_status(enum wsrep::server_state::state status, + const wsrep::view* view) { if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd)) { @@ -42,51 +28,44 @@ void wsrep_notify_status (wsrep_member_status_t status, } char cmd_buf[1 << 16]; // this can be long - long cmd_len = sizeof(cmd_buf) - 1; - char* cmd_ptr = cmd_buf; - long cmd_off = 0; + long cmd_len= sizeof(cmd_buf) - 1; + char* cmd_ptr= cmd_buf; + long cmd_off= 0; cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s", wsrep_notify_cmd); - if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR) - { - cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", - _status_str(status)); - } - else - { - /* here we preserve provider error codes */ - cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, - " --status 'Error(%d)'", status); - } + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", + to_c_string(status)); - if (0 != view) + if (view != NULL) { - char uuid_str[40]; - - wsrep_uuid_print (&view->state_id.uuid, uuid_str, sizeof(uuid_str)); + std::ostringstream uuid; + uuid << view->state_id().id(); cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, - " --uuid %s", uuid_str); + " --uuid %s", uuid.str().c_str()); cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, - " --primary %s", view->view >= 0 ? "yes" : "no"); + " --primary %s", view->view_seqno().get() >= 0 ? "yes" : "no"); cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, - " --index %d", view->my_idx); + " --index %ld", view->own_index()); - if (view->memb_num) + const std::vector<wsrep::view::member>& members(view->members()); + if (members.size()) { - cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); - - for (int i = 0; i < view->memb_num; i++) - { - wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str)); - cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, - "%c%s/%s/%s", i > 0 ? ',' : ' ', - uuid_str, view->members[i].name, - view->members[i].incoming); - } + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); + + for (unsigned int i= 0; i < members.size(); i++) + { + std::ostringstream id; + id << members[i].id(); + cmd_off += snprintf(cmd_ptr + cmd_off, cmd_len - cmd_off, + "%c%s/%s/%s", i > 0 ? ',' : ' ', + id.str().c_str(), + members[i].name().c_str(), + members[i].incoming().c_str()); + } } } @@ -100,7 +79,7 @@ void wsrep_notify_status (wsrep_member_status_t status, wsp::process p(cmd_ptr, "r", NULL); p.wait(); - int err = p.error(); + int err= p.error(); if (err) { diff --git a/sql/wsrep_plugin.cc b/sql/wsrep_plugin.cc new file mode 100644 index 00000000000..83618a50637 --- /dev/null +++ b/sql/wsrep_plugin.cc @@ -0,0 +1,53 @@ +/* Copyright 2016 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_trans_observer.h" +#include "wsrep_mysqld.h" + +#include <mysql/plugin.h> + +static int wsrep_plugin_init(void *p) +{ + WSREP_INFO("wsrep_plugin_init()"); + return 0; +} + +static int wsrep_plugin_deinit(void *p) +{ + WSREP_INFO("wsrep_plugin_deinit()"); + return 0; +} + +struct Mysql_replication wsrep_plugin= { + MYSQL_REPLICATION_INTERFACE_VERSION +}; + +maria_declare_plugin(wsrep) +{ + MYSQL_REPLICATION_PLUGIN, + &wsrep_plugin, + "wsrep", + "Codership Oy", + "Wsrep replication plugin", + PLUGIN_LICENSE_GPL, + wsrep_plugin_init, + wsrep_plugin_deinit, + 0x0100, + NULL, /* Status variables */ + NULL, /* System variables */ + "1.0", /* Version (string) */ + MariaDB_PLUGIN_MATURITY_STABLE /* Maturity */ +} +maria_declare_plugin_end; diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h index 222a49cc2ab..68773d27948 100644 --- a/sql/wsrep_priv.h +++ b/sql/wsrep_priv.h @@ -19,8 +19,9 @@ #ifndef WSREP_PRIV_H #define WSREP_PRIV_H +#include <my_global.h> #include "wsrep_mysqld.h" -#include "../wsrep/wsrep_api.h" +#include "wsrep_schema.h" #include <log.h> #include <pthread.h> @@ -31,25 +32,20 @@ my_bool wsrep_ready_set (my_bool x); ssize_t wsrep_sst_prepare (void** msg); wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, - const void* msg, size_t msg_len, + const wsrep_buf_t* msg, const wsrep_gtid_t* state_id, - const char* state, size_t state_len, + const wsrep_buf_t* state, bool bypass); extern wsrep_uuid_t local_uuid; extern wsrep_seqno_t local_seqno; +extern Wsrep_schema* wsrep_schema; // a helper function -bool wsrep_sst_received (wsrep_t* const wsrep, - const wsrep_uuid_t& uuid, - const wsrep_seqno_t seqno, - const void* const state, - const size_t state_len, - const bool implicit); -/*! SST thread signals init thread about sst completion */ -void wsrep_sst_complete(const wsrep_uuid_t*, wsrep_seqno_t, bool); - -void wsrep_notify_status (wsrep_member_status_t new_status, - const wsrep_view_info_t* view = 0); +void wsrep_sst_received(THD*, const wsrep_uuid_t&, wsrep_seqno_t, + const void*, size_t); + +void wsrep_notify_status(enum wsrep::server_state::state status, + const wsrep::view* view= 0); #endif /* WSREP_PRIV_H */ diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc new file mode 100644 index 00000000000..98f17e41c94 --- /dev/null +++ b/sql/wsrep_schema.cc @@ -0,0 +1,1360 @@ +/* Copyright (C) 2015-2017 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "mariadb.h" + +#include "table.h" +#include "key.h" +#include "sql_base.h" +#include "sql_parse.h" +#include "sql_update.h" +#include "transaction.h" + +#include "mysql/service_wsrep.h" +#include "wsrep_schema.h" +#include "wsrep_applier.h" +#include "wsrep_xid.h" +#include "wsrep_binlog.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_storage_service.h" + +#include <string> +#include <sstream> + +#define WSREP_SCHEMA "mysql" +#define WSREP_STREAMING_TABLE "wsrep_streaming_log" +#define WSREP_CLUSTER_TABLE "wsrep_cluster" +#define WSREP_MEMBERS_TABLE "wsrep_cluster_members" + +const char* wsrep_sr_table_name_full= WSREP_SCHEMA "/" WSREP_STREAMING_TABLE; + +static const std::string wsrep_schema_str= WSREP_SCHEMA; +static const std::string sr_table_str= WSREP_STREAMING_TABLE; +static const std::string cluster_table_str= WSREP_CLUSTER_TABLE; +static const std::string members_table_str= WSREP_MEMBERS_TABLE; + +static const std::string create_cluster_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + cluster_table_str + + "(" + "cluster_uuid CHAR(36) PRIMARY KEY," + "view_id BIGINT NOT NULL," + "view_seqno BIGINT NOT NULL," + "protocol_version INT NOT NULL," + "capabilities INT NOT NULL" + ") ENGINE=InnoDB"; + +static const std::string create_members_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + members_table_str + + "(" + "node_uuid CHAR(36) PRIMARY KEY," + "cluster_uuid CHAR(36) NOT NULL," + "node_name CHAR(32) NOT NULL," + "node_incoming_address VARCHAR(256) NOT NULL" + ") ENGINE=InnoDB"; + +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY +static const std::string cluster_member_history_table_str= "wsrep_cluster_member_history"; +static const std::string create_members_history_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + cluster_member_history_table_str + + "(" + "node_uuid CHAR(36) PRIMARY KEY," + "cluster_uuid CHAR(36) NOT NULL," + "last_view_id BIGINT NOT NULL," + "last_view_seqno BIGINT NOT NULL," + "node_name CHAR(32) NOT NULL," + "node_incoming_address VARCHAR(256) NOT NULL" + ") ENGINE=InnoDB"; +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + +static const std::string create_frag_table_str= + "CREATE TABLE IF NOT EXISTS " + wsrep_schema_str + "." + sr_table_str + + "(" + "node_uuid CHAR(36), " + "trx_id BIGINT, " + "seqno BIGINT, " + "flags INT NOT NULL, " + "frag LONGBLOB NOT NULL, " + "PRIMARY KEY (node_uuid, trx_id, seqno)" + ") ENGINE=InnoDB"; + +static const std::string delete_from_cluster_table= + "DELETE FROM " + wsrep_schema_str + "." + cluster_table_str; + +static const std::string delete_from_members_table= + "DELETE FROM " + wsrep_schema_str + "." + members_table_str; + +namespace Wsrep_schema_impl +{ + +class binlog_off +{ +public: + binlog_off(THD* thd) + : m_thd(thd) + , m_option_bits(thd->variables.option_bits) + , m_sql_log_bin(thd->variables.sql_log_bin) + { + thd->variables.option_bits&= ~OPTION_BIN_LOG; + thd->variables.sql_log_bin= 0; + } + ~binlog_off() + { + m_thd->variables.option_bits= m_option_bits; + m_thd->variables.sql_log_bin= m_sql_log_bin; + } +private: + THD* m_thd; + ulonglong m_option_bits; + my_bool m_sql_log_bin; +}; + +class wsrep_off +{ +public: + wsrep_off(THD* thd) + : m_thd(thd) + , m_wsrep_on(thd->variables.wsrep_on) + { + thd->variables.wsrep_on= 0; + } + ~wsrep_off() + { + m_thd->variables.wsrep_on= m_wsrep_on; + } +private: + THD* m_thd; + my_bool m_wsrep_on; +}; + +class thd_context_switch +{ +public: + thd_context_switch(THD *orig_thd, THD *cur_thd) + : m_orig_thd(orig_thd) + , m_cur_thd(cur_thd) + { + m_orig_thd->reset_globals(); + m_cur_thd->store_globals(); + } + ~thd_context_switch() + { + m_cur_thd->reset_globals(); + m_orig_thd->store_globals(); + } +private: + THD *m_orig_thd; + THD *m_cur_thd; +}; + +static int execute_SQL(THD* thd, const char* sql, uint length) { + DBUG_ENTER("Wsrep_schema::execute_SQL()"); + int err= 0; + + PSI_statement_locker *parent_locker= thd->m_statement_psi; + Parser_state parser_state; + + WSREP_DEBUG("SQL: %d %s thd: %lld", length, sql, (long long)thd->thread_id); + + if (parser_state.init(thd, (char*)sql, length) == 0) { + thd->reset_for_next_command(); + lex_start(thd); + + thd->m_statement_psi= NULL; + + thd->set_query((char*)sql, length); + thd->set_query_id(next_query_id()); + + mysql_parse(thd, (char*)sql, length, & parser_state, FALSE, FALSE); + + if (thd->is_error()) { + WSREP_WARN("Wsrep_schema::execute_sql() failed, %d %s\nSQL: %s", + thd->get_stmt_da()->sql_errno(), + thd->get_stmt_da()->message(), + sql); + err= 1; + } + thd->m_statement_psi= parent_locker; + thd->end_statement(); + thd->reset_query(); + close_thread_tables(thd); + delete_explain_query(thd->lex); + } + else { + WSREP_WARN("SR init failure"); + } + thd->cleanup_after_query(); + DBUG_RETURN(err); +} + +/* + Initialize thd for next "statement" + */ +static void init_stmt(THD* thd) { + thd->reset_for_next_command(); +} + +static void finish_stmt(THD* thd) { + trans_commit_stmt(thd); + close_thread_tables(thd); +} + +static int open_table(THD* thd, + const LEX_CSTRING *schema_name, + const LEX_CSTRING *table_name, + enum thr_lock_type const lock_type, + TABLE** table) { + assert(table); + *table= NULL; + + DBUG_ENTER("Wsrep_schema::open_table()"); + + TABLE_LIST tables; + uint flags= (MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY | + MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_LOCK_IGNORE_TIMEOUT); + + tables.init_one_table(schema_name, + table_name, + NULL, lock_type); + + if (!open_n_lock_single_table(thd, &tables, tables.lock_type, flags)) { + close_thread_tables(thd); + my_error(ER_NO_SUCH_TABLE, MYF(0), schema_name->str, table_name->str); + DBUG_RETURN(1); + } + + *table= tables.table; + (*table)->use_all_columns(); + + DBUG_RETURN(0); +} + + +static int open_for_write(THD* thd, const char* table_name, TABLE** table) { + LEX_CSTRING schema_str= { wsrep_schema_str.c_str(), wsrep_schema_str.length() }; + LEX_CSTRING table_str= { table_name, strlen(table_name) }; + if (Wsrep_schema_impl::open_table(thd, &schema_str, &table_str, TL_WRITE, + table)) { + WSREP_ERROR("Failed to open table %s.%s for writing", + schema_str.str, table_name); + return 1; + } + empty_record(*table); + (*table)->use_all_columns(); + restore_record(*table, s->default_values); + return 0; +} + +static void store(TABLE* table, uint field, const Wsrep_id& id) { + assert(field < table->s->fields); + std::ostringstream os; + os << id; + table->field[field]->store(os.str().c_str(), + os.str().size(), + &my_charset_bin); +} + + +template <typename INTTYPE> +static void store(TABLE* table, uint field, const INTTYPE val) { + assert(field < table->s->fields); + table->field[field]->store(val); +} + +template <typename CHARTYPE> +static void store(TABLE* table, uint field, const CHARTYPE* str, size_t str_len) { + assert(field < table->s->fields); + table->field[field]->store((const char*)str, + str_len, + &my_charset_bin); +} + +static void store(TABLE* table, uint field, const std::string& str) +{ + store(table, field, str.c_str(), str.size()); +} + +static int update_or_insert(TABLE* table) { + DBUG_ENTER("Wsrep_schema::update_or_insert()"); + int ret= 0; + char* key; + int error; + + /* + Verify that the table has primary key defined. + */ + if (table->s->primary_key >= MAX_KEY || + !table->s->keys_in_use.is_set(table->s->primary_key)) { + WSREP_ERROR("No primary key for %s.%s", + table->s->db.str, table->s->table_name.str); + DBUG_RETURN(1); + } + + /* + Find the record and update or insert a new one if not found. + */ + if (!(key= (char*) my_safe_alloca(table->s->max_unique_length))) { + WSREP_ERROR("Error allocating %ud bytes for key", + table->s->max_unique_length); + DBUG_RETURN(1); + } + + key_copy((uchar*) key, table->record[0], + table->key_info + table->s->primary_key, 0); + + if ((error= table->file->ha_index_read_idx_map(table->record[1], + table->s->primary_key, + (uchar*) key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT))) { + /* + Row not found, insert a new one. + */ + if ((error= table->file->ha_write_row(table->record[0]))) { + WSREP_ERROR("Error writing into %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + } + else if (!records_are_comparable(table) || compare_record(table)) { + /* + Record has changed + */ + if ((error= table->file->ha_update_row(table->record[1], + table->record[0])) && + error != HA_ERR_RECORD_IS_THE_SAME) { + WSREP_ERROR("Error updating record in %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + } + + my_safe_afree(key, table->s->max_unique_length); + + DBUG_RETURN(ret); +} + +static int insert(TABLE* table) { + DBUG_ENTER("Wsrep_schema::insert()"); + int ret= 0; + int error; + + /* + Verify that the table has primary key defined. + */ + if (table->s->primary_key >= MAX_KEY || + !table->s->keys_in_use.is_set(table->s->primary_key)) { + WSREP_ERROR("No primary key for %s.%s", + table->s->db.str, table->s->table_name.str); + DBUG_RETURN(1); + } + + if ((error= table->file->ha_write_row(table->record[0]))) { + WSREP_ERROR("Error writing into %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + ret= 1; + } + + DBUG_RETURN(ret); +} + +static int delete_row(TABLE* table) { + int error; + int retry= 3; + + do { + error= table->file->ha_delete_row(table->record[0]); + retry--; + } while (error && retry); + + if (error) { + WSREP_ERROR("Error deleting row from %s.%s: %d", + table->s->db.str, + table->s->table_name.str, + error); + return 1; + } + return 0; +} + +static int open_for_read(THD* thd, const char* table_name, TABLE** table) { + + LEX_CSTRING schema_str= { wsrep_schema_str.c_str(), wsrep_schema_str.length() }; + LEX_CSTRING table_str= { table_name, strlen(table_name) }; + if (Wsrep_schema_impl::open_table(thd, &schema_str, &table_str, TL_READ, + table)) { + WSREP_ERROR("Failed to open table %s.%s for reading", + schema_str.str, table_name); + return 1; + } + empty_record(*table); + (*table)->use_all_columns(); + restore_record(*table, s->default_values); + return 0; +} + +/* + Init table for sequential scan. + + @return 0 in case of success, 1 in case of error. + */ +static int init_for_scan(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_init(TRUE))) { + WSREP_ERROR("Failed to init table for scan: %d", error); + return 1; + } + return 0; +} +/* + Scan next record. For return codes see handler::ha_rnd_next() + + @return 0 in case of success, error code in case of error + */ +static int next_record(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_next(table->record[0])) && + error != HA_ERR_END_OF_FILE) { + WSREP_ERROR("Failed to read next record: %d", error); + } + return error; +} + +/* + End scan. + + @return 0 in case of success, 1 in case of error. + */ +static int end_scan(TABLE* table) { + int error; + if ((error= table->file->ha_rnd_end())) { + WSREP_ERROR("Failed to end scan: %d", error); + return 1; + } + return 0; +} + +static int scan(TABLE* table, uint field, wsrep::id& id) +{ + assert(field < table->s->fields); + String uuid_str; + (void)table->field[field]->val_str(&uuid_str); + id= wsrep::id(std::string(uuid_str.c_ptr(), uuid_str.length())); + return 0; +} + +template <typename INTTYPE> +static int scan(TABLE* table, uint field, INTTYPE& val) +{ + assert(field < table->s->fields); + val= table->field[field]->val_int(); + return 0; +} + +static int scan(TABLE* table, uint field, char* strbuf, uint strbuf_len) +{ + String str; + (void)table->field[field]->val_str(&str); + strncpy(strbuf, str.c_ptr(), std::min(str.length(), strbuf_len)); + strbuf[strbuf_len - 1]= '\0'; + return 0; +} + +/* + Scan member + TODO: filter members by cluster UUID + */ +static int scan_member(TABLE* table, + const Wsrep_id& cluster_uuid, + std::vector<Wsrep_view::member>& members) +{ + Wsrep_id member_id; + char member_name[128]= { 0, }; + char member_incoming[128]= { 0, }; + + if (scan(table, 0, member_id) || + scan(table, 2, member_name, sizeof(member_name)) || + scan(table, 3, member_incoming, sizeof(member_incoming))) { + return 1; + } + + if (members.empty() == false) { + assert(members.rbegin()->id() < member_id); + } + + try { + members.push_back(Wsrep_view::member(member_id, + member_name, + member_incoming)); + } + catch (...) { + WSREP_ERROR("Caught exception while scanning members table"); + return 1; + } + return 0; +} + +/* + Init table for index scan and retrieve first record + + @return 0 in case of success, error code in case of error. + */ +static int init_for_index_scan(TABLE* table, const uchar* key, + key_part_map map) { + int error; + if ((error= table->file->ha_index_init(table->s->primary_key, true))) { + WSREP_ERROR("Failed to init table for index scan: %d", error); + return error; + } + + error= table->file->ha_index_read_map(table->record[0], + key, map, HA_READ_KEY_EXACT); + switch(error) { + case 0: + case HA_ERR_END_OF_FILE: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_ABORTED_BY_USER: + break; + case -1: + WSREP_DEBUG("init_for_index_scan interrupted"); + break; + default: + WSREP_ERROR("init_for_index_scan failed to read first record, error %d", error); + } + return error; +} + +/* + End index scan. + + @return 0 in case of success, 1 in case of error. + */ +static int end_index_scan(TABLE* table) { + int error; + if ((error= table->file->ha_index_end())) { + WSREP_ERROR("Failed to end scan: %d", error); + return 1; + } + return 0; +} + +static void make_key(TABLE* table, uchar* key, key_part_map* map, int parts) { + uint prefix_length= 0; + KEY_PART_INFO* key_part= table->key_info->key_part; + for (int i=0; i < parts; i++) + prefix_length += key_part[i].store_length; + *map= make_prev_keypart_map(parts); + key_copy(key, table->record[0], table->key_info, prefix_length); +} +} /* namespace Wsrep_schema_impl */ + + +Wsrep_schema::Wsrep_schema() +{ +} + +Wsrep_schema::~Wsrep_schema() +{ } + +static void wsrep_init_thd_for_schema(THD *thd) +{ + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_GENERIC; + + mysql_mutex_lock(&LOCK_thread_count); + + thd->real_id=pthread_self(); // Keep purify happy + + WSREP_DEBUG("Wsrep_thd_pool: creating system thread: %lld", + (long long)thd->thread_id); + thd->prior_thr_create_utime= thd->start_utime= thd->thr_create_utime; + (void) mysql_mutex_unlock(&LOCK_thread_count); + + /* */ + thd->variables.wsrep_on = 0; + /* No binlogging */ + thd->variables.sql_log_bin = 0; + thd->variables.option_bits &= ~OPTION_BIN_LOG; + /* No general log */ + thd->variables.option_bits |= OPTION_LOG_OFF; + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; + thd->store_globals(); +} + +int Wsrep_schema::init() +{ + DBUG_ENTER("Wsrep_schema::init()"); + int ret; + THD* thd= new THD(next_thread_id()); + if (!thd) { + WSREP_ERROR("Unable to get thd"); + DBUG_RETURN(1); + } + thd->thread_stack= (char*)&thd; + wsrep_init_thd_for_schema(thd); + + if (Wsrep_schema_impl::execute_SQL(thd, create_cluster_table_str.c_str(), + create_cluster_table_str.size()) || + Wsrep_schema_impl::execute_SQL(thd, create_members_table_str.c_str(), + create_members_table_str.size()) || +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + Wsrep_schema_impl::execute_SQL(thd, + create_members_history_table_str.c_str(), + create_members_history_table_str.size()) || +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + Wsrep_schema_impl::execute_SQL(thd, + create_frag_table_str.c_str(), + create_frag_table_str.size())) { + ret= 1; + } + else { + ret= 0; + } + + delete thd; + DBUG_RETURN(ret); +} + +int Wsrep_schema::store_view(THD* thd, const Wsrep_view& view) +{ + DBUG_ENTER("Wsrep_schema::store_view()"); + assert(view.status() == Wsrep_view::primary); + int ret= 1; + int error; + TABLE* cluster_table= 0; + TABLE* members_table= 0; +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + TABLE* members_history_table= 0; +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + + Wsrep_schema_impl::wsrep_off wsrep_off(thd); + Wsrep_schema_impl::binlog_off binlog_off(thd); + + /* + Clean up cluster table and members table. + */ + if (Wsrep_schema_impl::execute_SQL(thd, + delete_from_cluster_table.c_str(), + delete_from_cluster_table.size()) || + Wsrep_schema_impl::execute_SQL(thd, + delete_from_members_table.c_str(), + delete_from_members_table.size())) { + goto out; + } + + /* + Store cluster view info + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, cluster_table_str.c_str(), &cluster_table)) + { + goto out; + } + + Wsrep_schema_impl::store(cluster_table, 0, view.state_id().id()); + Wsrep_schema_impl::store(cluster_table, 1, view.view_seqno().get()); + Wsrep_schema_impl::store(cluster_table, 2, view.state_id().seqno().get()); + Wsrep_schema_impl::store(cluster_table, 3, view.protocol_version()); + Wsrep_schema_impl::store(cluster_table, 4, view.capabilities()); + + if ((error= Wsrep_schema_impl::update_or_insert(cluster_table))) + { + WSREP_ERROR("failed to write to cluster table: %d", error); + goto out; + } + + Wsrep_schema_impl::finish_stmt(thd); + + /* + Store info about current members + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, members_table_str.c_str(), + &members_table)) + { + WSREP_ERROR("failed to open wsrep.members table"); + goto out; + } + + for (size_t i= 0; i < view.members().size(); ++i) + { + Wsrep_schema_impl::store(members_table, 0, view.members()[i].id()); + Wsrep_schema_impl::store(members_table, 1, view.state_id().id()); + Wsrep_schema_impl::store(members_table, 2, view.members()[i].name()); + Wsrep_schema_impl::store(members_table, 3, view.members()[i].incoming()); + if ((error= Wsrep_schema_impl::update_or_insert(members_table))) + { + WSREP_ERROR("failed to write wsrep.members table: %d", error); + goto out; + } + } + Wsrep_schema_impl::finish_stmt(thd); + +#ifdef WSREP_SCHEMA_MEMBERS_HISTORY + /* + Store members history + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, cluster_member_history.c_str(), + &members_history_table)) { + WSREP_ERROR("failed to open wsrep.members table"); + goto out; + } + + for (size_t i= 0; i < view.members().size(); ++i) { + Wsrep_schema_impl::store(members_history_table, 0, view.members()[i].id()); + Wsrep_schema_impl::store(members_history_table, 1, view.state_id().id()); + Wsrep_schema_impl::store(members_history_table, 2, view.view_seqno()); + Wsrep_schema_impl::store(members_history_table, 3, view.state_id().seqno()); + Wsrep_schema_impl::store(members_history_table, 4, + view.members()[i].name()); + Wsrep_schema_impl::store(members_history_table, 5, + view.members()[i].incoming()); + if ((error= Wsrep_schema_impl::update_or_insert(members_history_table))) { + WSREP_ERROR("failed to write wsrep_cluster_member_history table: %d", error); + goto out; + } + } + Wsrep_schema_impl::finish_stmt(thd); +#endif /* WSREP_SCHEMA_MEMBERS_HISTORY */ + ret= 0; + out: + + DBUG_RETURN(ret); +} + +Wsrep_view Wsrep_schema::restore_view(THD* thd, const Wsrep_id& own_id) const { + DBUG_ENTER("Wsrep_schema::restore_view()"); + + int ret= 1; + int error; + + TABLE* cluster_table= 0; + bool end_cluster_scan= false; + TABLE* members_table= 0; + bool end_members_scan= false; + + /* variables below need to be initialized in case cluster table is empty */ + Wsrep_id cluster_uuid; + wsrep_seqno_t view_id= -1; + wsrep_seqno_t view_seqno= -1; + int my_idx= -1; + int proto_ver= 0; + wsrep_cap_t capabilities= 0; + std::vector<Wsrep_view::member> members; + + // we don't want causal waits for reading non-replicated private data + int const wsrep_sync_wait_saved= thd->variables.wsrep_sync_wait; + thd->variables.wsrep_sync_wait= 0; + + if (trans_begin(thd, MYSQL_START_TRANS_OPT_READ_ONLY)) { + WSREP_ERROR("wsrep_schema::restore_view(): Failed to start transaction"); + goto out; + } + + /* + Read cluster info from cluster table + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_read(thd, cluster_table_str.c_str(), &cluster_table) || + Wsrep_schema_impl::init_for_scan(cluster_table)) { + goto out; + } + + if (((error= Wsrep_schema_impl::next_record(cluster_table)) != 0 || + Wsrep_schema_impl::scan(cluster_table, 0, cluster_uuid) || + Wsrep_schema_impl::scan(cluster_table, 1, view_id) || + Wsrep_schema_impl::scan(cluster_table, 2, view_seqno) || + Wsrep_schema_impl::scan(cluster_table, 3, proto_ver) || + Wsrep_schema_impl::scan(cluster_table, 4, capabilities)) && + error != HA_ERR_END_OF_FILE) { + end_cluster_scan= true; + goto out; + } + + if (Wsrep_schema_impl::end_scan(cluster_table)) { + goto out; + } + Wsrep_schema_impl::finish_stmt(thd); + + /* + Read members from members table + */ + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_read(thd, members_table_str.c_str(), &members_table) || + Wsrep_schema_impl::init_for_scan(members_table)) { + goto out; + } + end_members_scan= true; + + while (true) { + if ((error= Wsrep_schema_impl::next_record(members_table)) == 0) { + if (Wsrep_schema_impl::scan_member(members_table, + cluster_uuid, + members)) { + goto out; + } + } + else if (error == HA_ERR_END_OF_FILE) { + break; + } + else { + goto out; + } + } + + end_members_scan= false; + if (Wsrep_schema_impl::end_scan(members_table)) { + goto out; + } + Wsrep_schema_impl::finish_stmt(thd); + + if (own_id.is_undefined() == false) { + for (uint i= 0; i < members.size(); ++i) { + if (members[i].id() == own_id) { + my_idx= i; + break; + } + } + } + + (void)trans_commit(thd); + ret= 0; /* Success*/ + out: + + if (end_cluster_scan) Wsrep_schema_impl::end_scan(cluster_table); + if (end_members_scan) Wsrep_schema_impl::end_scan(members_table); + + if (0 != ret) { + trans_rollback_stmt(thd); + if (!trans_rollback(thd)) { + close_thread_tables(thd); + } + } + thd->mdl_context.release_transactional_locks(); + + thd->variables.wsrep_sync_wait= wsrep_sync_wait_saved; + + if (0 == ret) { + Wsrep_view ret_view( + wsrep::gtid(cluster_uuid, Wsrep_seqno(view_seqno)), + Wsrep_seqno(view_id), + wsrep::view::primary, + capabilities, + my_idx, + proto_ver, + members + ); + + if (wsrep_debug) { + std::ostringstream os; + os << "Restored cluster view:\n" << ret_view; + WSREP_INFO("%s", os.str().c_str()); + } + DBUG_RETURN(ret_view); + } + else + { + WSREP_ERROR("wsrep_schema::restore_view() failed."); + Wsrep_view ret_view; + DBUG_RETURN(ret_view); + } +} + +int Wsrep_schema::append_fragment(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno, + int flags, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_schema::append_fragment"); + std::ostringstream os; + os << server_id; + WSREP_DEBUG("Append fragment(%llu) %s, %llu", + thd->thread_id, + os.str().c_str(), + transaction_id.get()); + Wsrep_schema_impl::binlog_off binlog_off(thd); + Wsrep_schema_impl::init_stmt(thd); + + TABLE* frag_table= 0; + if (Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table)) + { + trans_rollback_stmt(thd); + DBUG_RETURN(1); + } + + Wsrep_schema_impl::store(frag_table, 0, server_id); + Wsrep_schema_impl::store(frag_table, 1, transaction_id.get()); + Wsrep_schema_impl::store(frag_table, 2, seqno.get()); + Wsrep_schema_impl::store(frag_table, 3, flags); + Wsrep_schema_impl::store(frag_table, 4, data.data(), data.size()); + + int error; + if ((error= Wsrep_schema_impl::insert(frag_table))) { + WSREP_ERROR("Failed to write to frag table: %d", error); + trans_rollback_stmt(thd); + DBUG_RETURN(1); + } + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(0); +} + +int Wsrep_schema::update_fragment_meta(THD* thd, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_schema::update_fragment_meta"); + std::ostringstream os; + os << ws_meta.server_id(); + WSREP_DEBUG("update_frag_seqno(%llu) %s, %llu, seqno %lld", + thd->thread_id, + os.str().c_str(), + ws_meta.transaction_id().get(), + ws_meta.seqno().get()); + DBUG_ASSERT(ws_meta.seqno().is_undefined() == false); + + Wsrep_schema_impl::binlog_off binlog_off(thd); + int error; + uchar key[MAX_KEY_LENGTH]; + key_part_map key_map= 0; + TABLE* frag_table= 0; + + Wsrep_schema_impl::init_stmt(thd); + if (Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table)) + { + DBUG_RETURN(1); + } + + /* Find record with the given uuid, trx id, and seqno -1 */ + Wsrep_schema_impl::store(frag_table, 0, ws_meta.server_id()); + Wsrep_schema_impl::store(frag_table, 1, ws_meta.transaction_id().get()); + Wsrep_schema_impl::store(frag_table, 2, -1); + Wsrep_schema_impl::make_key(frag_table, key, &key_map, 3); + + if ((error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, key_map))) + { + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + WSREP_WARN("Record not found in %s.%s: %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + error); + } + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(1); + } + + /* Copy the original record to frag_table->record[1] */ + store_record(frag_table, record[1]); + + /* Store seqno in frag_table->record[0] and update the row */ + Wsrep_schema_impl::store(frag_table, 2, ws_meta.seqno().get()); + if ((error= frag_table->file->ha_update_row(frag_table->record[1], + frag_table->record[0]))) { + WSREP_ERROR("Error updating record in %s.%s: %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + error); + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(1); + } + + int ret= Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(ret); +} + +static int remove_fragment(THD* thd, + TABLE* frag_table, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno) +{ + WSREP_DEBUG("remove_fragment(%llu) trx %llu, seqno %lld", + thd->thread_id, + transaction_id.get(), + seqno.get()); + int ret= 0; + int error; + uchar key[MAX_KEY_LENGTH]; + key_part_map key_map= 0; + + DBUG_ASSERT(server_id.is_undefined() == false); + DBUG_ASSERT(transaction_id.is_undefined() == false); + DBUG_ASSERT(seqno.is_undefined() == false); + + /* + Remove record with the given uuid, trx id, and seqno. + Using a complete key here avoids gap locks. + */ + Wsrep_schema_impl::store(frag_table, 0, server_id); + Wsrep_schema_impl::store(frag_table, 1, transaction_id.get()); + Wsrep_schema_impl::store(frag_table, 2, seqno.get()); + Wsrep_schema_impl::make_key(frag_table, key, &key_map, 3); + + if ((error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map))) + { + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) + { + WSREP_DEBUG("Record not found in %s.%s:trx %llu, seqno %lld, error %d", + frag_table->s->db.str, + frag_table->s->table_name.str, + transaction_id.get(), + seqno.get(), + error); + } + ret= error; + } + else if (Wsrep_schema_impl::delete_row(frag_table)) + { + ret= 1; + } + + Wsrep_schema_impl::end_index_scan(frag_table); + return ret; +} + +int Wsrep_schema::remove_fragments(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + const std::vector<wsrep::seqno>& fragments) +{ + DBUG_ENTER("Wsrep_schema::remove_fragments"); + int ret= 0; + + WSREP_DEBUG("Removing %zu fragments", fragments.size()); + Wsrep_schema_impl::wsrep_off wsrep_off(thd); + Wsrep_schema_impl::binlog_off binlog_off(thd); + + /* + Open SR table for write. + Adopted from Rpl_info_table_access::open_table() + */ + uint flags= (MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY | + MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_LOCK_IGNORE_TIMEOUT); + Query_tables_list query_tables_list_backup; + Open_tables_backup open_tables_backup; + thd->lex->reset_n_backup_query_tables_list(&query_tables_list_backup); + thd->reset_n_backup_open_tables_state(&open_tables_backup); + TABLE_LIST tables; + LEX_CSTRING schema_str= { wsrep_schema_str.c_str(), wsrep_schema_str.length() }; + LEX_CSTRING table_str= { sr_table_str.c_str(), sr_table_str.length() }; + tables.init_one_table(&schema_str, + &table_str, 0, TL_WRITE); + + if (!open_n_lock_single_table(thd, &tables, tables.lock_type, flags)) + { + WSREP_DEBUG("Failed to open SR table for access"); + ret= 1; + } + else + { + tables.table->use_all_columns(); + for (std::vector<wsrep::seqno>::const_iterator i= fragments.begin(); + i != fragments.end(); ++i) + { + if (remove_fragment(thd, + tables.table, + server_id, + transaction_id, *i)) + { + ret= 1; + break; + } + } + } + close_thread_tables(thd); + thd->restore_backup_open_tables_state(&open_tables_backup); + thd->lex->restore_backup_query_tables_list(&query_tables_list_backup); + + if (thd->wsrep_cs().mode() == wsrep::client_state::m_local && + !thd->in_multi_stmt_transaction_mode()) + { + /* + The ugly part: Locally executing autocommit statement is + committing and it has removed a fragment from stable storage. + Now calling finish_stmt() will call trans_commit_stmt(), which will + actually commit the transaction, what we really don't want + to do at this point. + + Doing nothing at this point seems to work ok, this block is + intentionally no-op and for documentation purposes only. + */ + } + else + { + Wsrep_schema_impl::finish_stmt(thd); + } + + DBUG_RETURN(ret); +} + +int Wsrep_schema::replay_transaction(THD* thd, + Relay_log_info* rli, + const wsrep::ws_meta& ws_meta, + const std::vector<wsrep::seqno>& fragments) +{ + DBUG_ENTER("Wsrep_schema::replay_transaction"); + DBUG_ASSERT(!fragments.empty()); + + Wsrep_schema_impl::wsrep_off wsrep_off(thd); + Wsrep_schema_impl::binlog_off binlog_off(thd); + + int ret= 1; + int error; + TABLE* frag_table= 0; + uchar key[MAX_KEY_LENGTH]; + key_part_map key_map= 0; + + for (std::vector<wsrep::seqno>::const_iterator i= fragments.begin(); + i != fragments.end(); ++i) + { + Wsrep_schema_impl::init_stmt(thd); + if ((error= Wsrep_schema_impl::open_for_read(thd, sr_table_str.c_str(), &frag_table))) + { + WSREP_WARN("Could not open SR table for read: %d", error); + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(1); + } + + Wsrep_schema_impl::store(frag_table, 0, ws_meta.server_id()); + Wsrep_schema_impl::store(frag_table, 1, ws_meta.transaction_id().get()); + Wsrep_schema_impl::store(frag_table, 2, i->get()); + Wsrep_schema_impl::make_key(frag_table, key, &key_map, 3); + + int error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map); + if (error) + { + WSREP_WARN("Failed to init streaming log table for index scan: %d", + error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + + int flags; + Wsrep_schema_impl::scan(frag_table, 3, flags); + WSREP_DEBUG("replay_fragment(%llu): seqno: %lld flags: %x", + ws_meta.transaction_id().get(), + i->get(), + flags); + String buf; + frag_table->field[4]->val_str(&buf); + + Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(thd); + ret= wsrep_apply_events(thd, rli, buf.c_ptr_safe(), buf.length()); + if (ret) + { + WSREP_WARN("Wsrep_schema::replay_transaction: failed to apply fragments"); + break; + } + Wsrep_schema_impl::init_stmt(thd); + + if ((error= Wsrep_schema_impl::open_for_write(thd, sr_table_str.c_str(), &frag_table))) + { + WSREP_WARN("Could not open SR table for write: %d", error); + Wsrep_schema_impl::finish_stmt(thd); + DBUG_RETURN(1); + } + error= Wsrep_schema_impl::init_for_index_scan(frag_table, + key, + key_map); + if (error) + { + WSREP_WARN("Failed to init streaming log table for index scan: %d", + error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + + error= Wsrep_schema_impl::delete_row(frag_table); + if (error) + { + WSREP_WARN("Could not delete row from streaming log table: %d", error); + Wsrep_schema_impl::end_index_scan(frag_table); + ret= 1; + break; + } + Wsrep_schema_impl::end_index_scan(frag_table); + Wsrep_schema_impl::finish_stmt(thd); + } + + DBUG_RETURN(ret); +} + +int Wsrep_schema::recover_sr_transactions(THD *orig_thd) +{ + DBUG_ENTER("Wsrep_schema::recover_sr_transactions"); + THD storage_thd(true, true); + storage_thd.thread_stack= (orig_thd ? orig_thd->thread_stack : + (char*) &storage_thd); + TABLE* frag_table= 0; + TABLE* cluster_table= 0; + Wsrep_storage_service storage_service(&storage_thd); + Wsrep_schema_impl::binlog_off binlog_off(&storage_thd); + Wsrep_schema_impl::wsrep_off binglog_off(&storage_thd); + Wsrep_schema_impl::thd_context_switch thd_context_switch(orig_thd, + &storage_thd); + Wsrep_server_state& server_state(Wsrep_server_state::instance()); + + int ret= 1; + int error; + wsrep::id cluster_id; + + Wsrep_schema_impl::init_stmt(&storage_thd); + storage_thd.wsrep_skip_locking= FALSE; + /* + Open the table for reading and writing so that fragments without + valid seqno can be deleted. + */ + if (Wsrep_schema_impl::open_for_write(&storage_thd, + cluster_table_str.c_str(), + &cluster_table) || + Wsrep_schema_impl::init_for_scan(cluster_table)) + { + Wsrep_schema_impl::finish_stmt(&storage_thd); + DBUG_RETURN(1); + } + + if ((error= Wsrep_schema_impl::next_record(cluster_table))) + { + Wsrep_schema_impl::end_scan(cluster_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + trans_commit(&storage_thd); + if (error == HA_ERR_END_OF_FILE) + { + WSREP_INFO("Cluster table is empty, not recovering transactions"); + DBUG_RETURN(0); + } + else + { + WSREP_ERROR("Failed to read cluster table: %d", error); + DBUG_RETURN(1); + } + } + + Wsrep_schema_impl::scan(cluster_table, 0, cluster_id); + Wsrep_schema_impl::end_scan(cluster_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + + std::ostringstream os; + os << cluster_id; + WSREP_INFO("Recovered cluster id %s", os.str().c_str()); + + storage_thd.wsrep_skip_locking= TRUE; + Wsrep_schema_impl::init_stmt(&storage_thd); + if (Wsrep_schema_impl::open_for_read(&storage_thd, sr_table_str.c_str(), &frag_table) || + Wsrep_schema_impl::init_for_scan(frag_table)) + { + WSREP_ERROR("Failed to open SR table for read"); + goto out; + } + + while (true) + { + if ((error= Wsrep_schema_impl::next_record(frag_table)) == 0) + { + wsrep::id server_id; + Wsrep_schema_impl::scan(frag_table, 0, server_id); + wsrep::client_id client_id; + unsigned long long transaction_id_ull; + Wsrep_schema_impl::scan(frag_table, 1, transaction_id_ull); + wsrep::transaction_id transaction_id(transaction_id_ull); + long long seqno_ll; + Wsrep_schema_impl::scan(frag_table, 2, seqno_ll); + wsrep::seqno seqno(seqno_ll); + + /* This is possible if the server crashes between inserting the + fragment into table and updating the fragment seqno after + certification. */ + if (seqno.is_undefined()) + { + Wsrep_schema_impl::delete_row(frag_table); + continue; + } + + wsrep::gtid gtid(cluster_id, seqno); + int flags; + Wsrep_schema_impl::scan(frag_table, 3, flags); + String data_str; + + (void)frag_table->field[4]->val_str(&data_str); + wsrep::const_buffer data(data_str.c_ptr(), data_str.length()); + wsrep::ws_meta ws_meta(gtid, + wsrep::stid(server_id, + transaction_id, + client_id), + wsrep::seqno::undefined(), + flags); + + wsrep::high_priority_service* applier; + if (!(applier= server_state.find_streaming_applier(server_id, + transaction_id))) + { + DBUG_ASSERT(wsrep::starts_transaction(flags)); + THD* thd= new THD(true, true); + thd->thread_stack= (char*)&storage_thd; + + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id= next_thread_id(); + thd->real_id= pthread_self(); + mysql_mutex_unlock(&LOCK_thread_count); + + applier= new Wsrep_applier_service(thd); + server_state.start_streaming_applier(server_id, transaction_id, + applier); + applier->start_transaction(wsrep::ws_handle(transaction_id, 0), + ws_meta); + } + applier->store_globals(); + applier->apply_write_set(ws_meta, data); + applier->after_apply(); + storage_service.store_globals(); + } + else if (error == HA_ERR_END_OF_FILE) + { + ret= 0; + break; + } + else + { + WSREP_ERROR("SR table scan returned error %d", error); + break; + } + } + Wsrep_schema_impl::end_scan(frag_table); + Wsrep_schema_impl::finish_stmt(&storage_thd); + trans_commit(&storage_thd); +out: + DBUG_RETURN(ret); +} diff --git a/sql/wsrep_schema.h b/sql/wsrep_schema.h new file mode 100644 index 00000000000..fb5eaa8931f --- /dev/null +++ b/sql/wsrep_schema.h @@ -0,0 +1,161 @@ +/* Copyright (C) 2015-2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + + +#ifndef WSREP_SCHEMA_H +#define WSREP_SCHEMA_H + +/* wsrep-lib */ +#include "wsrep_types.h" + + +#include "mysqld.h" +#include "thr_lock.h" /* enum thr_lock_type */ +#include "wsrep_mysqld.h" + +#include <string> + +/* + Forward decls +*/ +class THD; +class Relay_log_info; +struct TABLE; +struct TABLE_LIST; +struct st_mysql_lex_string; +typedef struct st_mysql_lex_string LEX_STRING; + +/** Name of the table in `wsrep_schema_str` used for storing streaming +replication data. In an InnoDB full format, e.g. "database/tablename". */ +extern const char* wsrep_sr_table_name_full; + +class Wsrep_schema +{ + public: + + Wsrep_schema(); + ~Wsrep_schema(); + + /* + Initialize wsrep schema. Storage engines must be running before + calling this function. + */ + int init(); + + /* + Store wsrep view info into wsrep schema. + */ + int store_view(THD*, const Wsrep_view& view); + + /* + Restore view info from stable storage. + */ + Wsrep_view restore_view(THD* thd, const Wsrep_id& own_id) const; + + /* + Append transaction fragment to fragment storage. + Starts a trx using a THD from thd_pool, does not commit. + Should be followed by a call to update_frag_seqno(), or + release_SR_thd() if wsrep->certify() fails. + */ + THD* append_frag(const wsrep_trx_meta_t&, uint32_t, + const unsigned char*, size_t); + /** + Append transaction fragment to fragment storage. + Transaction must have been started for THD before this call. + In order to make changes durable, transaction must be committed + separately after this call. + + @param thd THD object + @param server_id Wsrep server identifier + @param transaction_id Transaction identifier + @param flags Flags for the fragment + @param data Fragment data buffer + + @return Zero in case of success, non-zero on failure. + */ + int append_fragment(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + wsrep::seqno seqno, + int flags, + const wsrep::const_buffer& data); + /** + Update existing fragment meta data. The fragment must have been + inserted before using append_fragment(). + + @param thd THD object + @param ws_meta Wsrep meta data + + @return Zero in case of success, non-zero on failure. + */ + int update_fragment_meta(THD* thd, + const wsrep::ws_meta& ws_meta); + + /** + Remove fragments from storage. This method must be called + inside active transaction. Fragment removal will be committed + once the transaction commits. + + @param thd Pointer to THD object + @param server_id Identifier of the running server + @param transaction_id Identifier of the current transaction + @param fragments Vector of fragment seqnos to be removed + */ + int remove_fragments(THD* thd, + const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + const std::vector<wsrep::seqno>& fragments); + + /** + Replay a transaction from stored fragments. The caller must have + started a transaction for a thd. + + @param thd Pointer to THD object + @param ws_meta Write set meta data for commit fragment. + @param fragments Vector of fragments to be replayed + + @return Zero on success, non-zero on failure. + */ + int replay_transaction(THD* thd, + Relay_log_info* rli, + const wsrep::ws_meta& ws_meta, + const std::vector<wsrep::seqno>& fragments); + + /** + Recover streaming transactions from SR table. + This method should be called after storage enignes are initialized. + It will scan SR table and replay found streaming transactions. + + @param orig_thd The THD object of the calling thread. + + @return Zero on success, non-zero on failure. + */ + int recover_sr_transactions(THD* orig_thd); + + /* + Close wsrep schema. + */ + void close(); + + private: + /* Non-copyable */ + Wsrep_schema(const Wsrep_schema&); + Wsrep_schema& operator=(const Wsrep_schema&); +}; + +extern Wsrep_schema* wsrep_schema; + +#endif /* !WSREP_SCHEMA_H */ diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc new file mode 100644 index 00000000000..7efff35f2b1 --- /dev/null +++ b/sql/wsrep_server_service.cc @@ -0,0 +1,318 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_server_service.h" +#include "wsrep_server_state.h" +#include "wsrep_client_state.h" +#include "wsrep_client_service.h" +#include "wsrep_storage_service.h" +#include "wsrep_high_priority_service.h" + +#include "wsrep_sst.h" +#include "wsrep_xid.h" +#include "wsrep_mysqld.h" +#include "wsrep_schema.h" +#include "wsrep_utils.h" + +#include "log.h" /* sql_print_xxx() */ +#include "sql_class.h" /* system variables */ +#include "transaction.h" /* trans_xxx */ +#include "sql_base.h" /* close_thread_tables */ + +static void init_service_thd(THD* thd, char* thread_stack) +{ + thd->thread_stack= thread_stack; + thd->real_id= pthread_self(); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + thd->set_command(COM_SLEEP); + thd->reset_for_next_command(true); +} + +wsrep::storage_service* Wsrep_server_service::storage_service( + wsrep::client_service& client_service) +{ + Wsrep_client_service& cs= + static_cast<Wsrep_client_service&>(client_service); + THD* thd= new THD(next_thread_id(), true, true); + init_service_thd(thd, cs.m_thd->thread_stack); + WSREP_DEBUG("Created storage service with thread id %llu", + thd->thread_id); + return new Wsrep_storage_service(thd); +} + +wsrep::storage_service* Wsrep_server_service::storage_service( + wsrep::high_priority_service& high_priority_service) +{ + Wsrep_high_priority_service& hps= + static_cast<Wsrep_high_priority_service&>(high_priority_service); + THD* thd= new THD(next_thread_id(), true, true); + init_service_thd(thd, hps.m_thd->thread_stack); + WSREP_DEBUG("Created high priority storage service with thread id %llu", + thd->thread_id); + return new Wsrep_storage_service(thd); +} + +void Wsrep_server_service::release_storage_service( + wsrep::storage_service* storage_service) +{ + Wsrep_storage_service* ss= + static_cast<Wsrep_storage_service*>(storage_service); + THD* thd= ss->m_thd; + delete ss; + delete thd; +} + +wsrep::high_priority_service* +Wsrep_server_service::streaming_applier_service( + wsrep::client_service& orig_client_service) +{ + Wsrep_client_service& orig_cs= + static_cast<Wsrep_client_service&>(orig_client_service); + THD* thd= new THD(next_thread_id(), true, true); + init_service_thd(thd, orig_cs.m_thd->thread_stack); + WSREP_DEBUG("Created streaming applier service in local context with " + "thread id %llu", thd->thread_id); + return new Wsrep_applier_service(thd); +} + +wsrep::high_priority_service* +Wsrep_server_service::streaming_applier_service( + wsrep::high_priority_service& orig_high_priority_service) +{ + Wsrep_high_priority_service& + orig_hps(static_cast<Wsrep_high_priority_service&>(orig_high_priority_service)); + THD* thd= new THD(next_thread_id(), true, true); + init_service_thd(thd, orig_hps.m_thd->thread_stack); + WSREP_DEBUG("Created streaming applier service in high priority " + "context with thread id %llu", thd->thread_id); + return new Wsrep_applier_service(thd); +} + +void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_service* high_priority_service) +{ + Wsrep_high_priority_service* hps= + static_cast<Wsrep_high_priority_service*>(high_priority_service); + THD* thd= hps->m_thd; + delete hps; + delete thd; +} + +void Wsrep_server_service::background_rollback(wsrep::client_state& client_state) +{ + Wsrep_client_state& cs= static_cast<Wsrep_client_state&>(client_state); + wsrep_fire_rollbacker(cs.thd()); +} + +void Wsrep_server_service::bootstrap() +{ + wsrep::log_info() + << "Bootstrapping a new cluster, setting initial position to " + << wsrep::gtid::undefined(); + wsrep_set_SE_checkpoint(wsrep::gtid::undefined()); +} + +void Wsrep_server_service::log_message(enum wsrep::log::level level, + const char* message) +{ + switch (level) + { + case wsrep::log::debug: + sql_print_information("debug: %s", message); + break; + case wsrep::log::info: + sql_print_information("%s", message); + break; + case wsrep::log::warning: + sql_print_warning("%s", message); + break; + case wsrep::log::error: + sql_print_error("%s", message); + break; + } +} + +void Wsrep_server_service::log_view( + wsrep::high_priority_service* high_priority_service, + const wsrep::view& view) +{ + Wsrep_high_priority_service* applier= + static_cast<Wsrep_high_priority_service*>(high_priority_service); + /* Update global system variables */ + mysql_mutex_lock(&LOCK_global_system_variables); + if (wsrep_auto_increment_control && view.own_index() >= 0) + { + global_system_variables.auto_increment_offset= view.own_index() + 1; + global_system_variables.auto_increment_increment= view.members().size(); + wsrep_protocol_version= view.protocol_version(); + } + mysql_mutex_unlock(&LOCK_global_system_variables); + + /* Update wsrep status variables */ + mysql_mutex_lock(&LOCK_status); + wsrep_cluster_size= view.members().size(); + wsrep_local_index= view.own_index(); + std::ostringstream os; + os << view.state_id().id(); + wsrep_update_cluster_state_uuid(os.str().c_str()); + mysql_mutex_unlock(&LOCK_status); + wsrep_config_state->set(view); + + if (view.status() == wsrep::view::primary) + { + if (applier) + { + Wsrep_id id; + Wsrep_view prev_view= wsrep_schema->restore_view(applier->m_thd, id); + if (prev_view.state_id().id() != view.state_id().id()) + { + WSREP_DEBUG("New cluster UUID was generated, resetting position info"); + wsrep_set_SE_checkpoint(wsrep::gtid::undefined()); + } + + if (wsrep_debug) + { + std::ostringstream os; + os << "Storing cluster view:\n" << view; + WSREP_INFO("%s", os.str().c_str()); + DBUG_ASSERT(prev_view.state_id().id() != view.state_id().id() || + view.state_id().seqno() > prev_view.state_id().seqno()); + } + + if (trans_begin(applier->m_thd, MYSQL_START_TRANS_OPT_READ_WRITE)) + { + WSREP_WARN("Failed to start transaction for store view"); + } + else + { + if (wsrep_schema->store_view(applier->m_thd, view)) + { + WSREP_WARN("Failed to store view"); + trans_rollback_stmt(applier->m_thd); + if (!trans_rollback(applier->m_thd)) + { + close_thread_tables(applier->m_thd); + } + } + else + { + if (trans_commit(applier->m_thd)) + { + WSREP_WARN("Failed to commit transaction for store view"); + } + } + applier->m_thd->mdl_context.release_transactional_locks(); + } + + wsrep_set_SE_checkpoint(view.state_id()); + DBUG_ASSERT(wsrep_get_SE_checkpoint().id() == view.state_id().id()); + } + else + { + WSREP_DEBUG("No applier in Wsrep_server_service::log_view(), " + "skipping write to wsrep_schema"); + } + } +} + +void Wsrep_server_service::recover_streaming_appliers(wsrep::client_service& cs) +{ + Wsrep_client_service& client_service= static_cast<Wsrep_client_service&>(cs); + wsrep_recover_sr_from_storage(client_service.m_thd); +} + +void Wsrep_server_service::recover_streaming_appliers( + wsrep::high_priority_service& hs) +{ + Wsrep_high_priority_service& high_priority_service= + static_cast<Wsrep_high_priority_service&>(hs); + wsrep_recover_sr_from_storage(high_priority_service.m_thd); +} + +wsrep::view Wsrep_server_service::get_view(wsrep::client_service& c, + const wsrep::id& own_id) +{ + Wsrep_client_service& cs(static_cast<Wsrep_client_service&>(c)); + wsrep::view v(wsrep_schema->restore_view(cs.m_thd, own_id)); + return v; +} + +wsrep::gtid Wsrep_server_service::get_position(wsrep::client_service&) +{ + return wsrep_get_SE_checkpoint(); +} + +void Wsrep_server_service::log_state_change( + enum Wsrep_server_state::state prev_state, + enum Wsrep_server_state::state current_state) +{ + WSREP_INFO("Server status change %s -> %s", + wsrep::to_c_string(prev_state), + wsrep::to_c_string(current_state)); + mysql_mutex_lock(&LOCK_status); + switch (current_state) + { + case Wsrep_server_state::s_synced: + wsrep_ready= TRUE; + WSREP_INFO("Synchronized with group, ready for connections"); + /* fall through */ + case Wsrep_server_state::s_joined: + case Wsrep_server_state::s_donor: + wsrep_cluster_status= "Primary"; + break; + case Wsrep_server_state::s_connected: + wsrep_cluster_status= "non-Primary"; + wsrep_ready= FALSE; + wsrep_connected= TRUE; + break; + case Wsrep_server_state::s_disconnected: + wsrep_ready= FALSE; + wsrep_connected= FALSE; + wsrep_cluster_status= "Disconnected"; + break; + default: + wsrep_ready= FALSE; + wsrep_cluster_status= "non-Primary"; + break; + } + mysql_mutex_unlock(&LOCK_status); + wsrep_config_state->set(current_state); +} + +bool Wsrep_server_service::sst_before_init() const +{ + return wsrep_before_SE(); +} + +std::string Wsrep_server_service::sst_request() +{ + return wsrep_sst_prepare(); +} + +int Wsrep_server_service::start_sst(const std::string& sst_request, + const wsrep::gtid& gtid, + bool bypass) +{ + return wsrep_sst_donate(sst_request, gtid, bypass); +} + +int Wsrep_server_service::wait_committing_transactions(int timeout) +{ + return wsrep_wait_committing_connections_close(timeout); +} + +void Wsrep_server_service::debug_sync(const char*) +{ +} diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h new file mode 100644 index 00000000000..b8f1f009cde --- /dev/null +++ b/sql/wsrep_server_service.h @@ -0,0 +1,81 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_SERVER_SERVICE_H +#define WSREP_SERVER_SERVICE_H + +/* wsrep-lib */ +#include "wsrep/server_service.hpp" +#include "wsrep/exception.hpp" // not_impemented_error(), remove when finished +#include "wsrep/storage_service.hpp" + +class Wsrep_server_state; + + +/* wsrep::server_service interface implementation */ +class Wsrep_server_service : public wsrep::server_service +{ +public: + Wsrep_server_service(Wsrep_server_state& server_state) + : m_server_state(server_state) + { } + + wsrep::storage_service* storage_service(wsrep::client_service&); + + wsrep::storage_service* storage_service(wsrep::high_priority_service&); + + void release_storage_service(wsrep::storage_service*); + + wsrep::high_priority_service* + streaming_applier_service(wsrep::client_service&); + + wsrep::high_priority_service* + streaming_applier_service(wsrep::high_priority_service&); + + void release_high_priority_service(wsrep::high_priority_service*); + + void background_rollback(wsrep::client_state&); + + void bootstrap(); + void log_message(enum wsrep::log::level, const char*); + + void log_dummy_write_set(wsrep::client_state&, const wsrep::ws_meta&) + { throw wsrep::not_implemented_error(); } + + void log_view(wsrep::high_priority_service*, const wsrep::view&); + + void recover_streaming_appliers(wsrep::client_service&); + void recover_streaming_appliers(wsrep::high_priority_service&); + wsrep::view get_view(wsrep::client_service&, const wsrep::id& own_id); + + wsrep::gtid get_position(wsrep::client_service&); + + void log_state_change(enum wsrep::server_state::state, + enum wsrep::server_state::state); + + bool sst_before_init() const; + + std::string sst_request(); + int start_sst(const std::string&, const wsrep::gtid&, bool); + + int wait_committing_transactions(int); + + void debug_sync(const char*); +private: + Wsrep_server_state& m_server_state; +}; + + +#endif /* WSREP_SERVER_SERVICE */ diff --git a/sql/wsrep_server_state.cc b/sql/wsrep_server_state.cc new file mode 100644 index 00000000000..4571201b07d --- /dev/null +++ b/sql/wsrep_server_state.cc @@ -0,0 +1,82 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_api.h" +#include "wsrep_server_state.h" + +mysql_mutex_t LOCK_wsrep_server_state; +mysql_cond_t COND_wsrep_server_state; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_LOCK_wsrep_server_state; +PSI_cond_key key_COND_wsrep_server_state; +#endif + +Wsrep_server_state::Wsrep_server_state(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version) + : wsrep::server_state(m_mutex, + m_cond, + m_service, + name, + incoming_address, + address, + working_dir, + initial_position, + max_protocol_version, + wsrep::server_state::rm_sync) + , m_mutex(LOCK_wsrep_server_state) + , m_cond(COND_wsrep_server_state) + , m_service(*this) +{ + +} + +void Wsrep_server_state::init_once(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version) +{ + if (m_instance == 0) + { + mysql_mutex_init(key_LOCK_wsrep_server_state, &LOCK_wsrep_server_state, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_server_state, &COND_wsrep_server_state, 0); + m_instance = new Wsrep_server_state(name, + incoming_address, + address, + working_dir, + initial_position, + max_protocol_version); + } +} + +void Wsrep_server_state::destroy() +{ + + if (m_instance) + { + delete m_instance; + m_instance= 0; + mysql_mutex_destroy(&LOCK_wsrep_server_state); + mysql_cond_destroy(&COND_wsrep_server_state); + } +} diff --git a/sql/wsrep_server_state.h b/sql/wsrep_server_state.h new file mode 100644 index 00000000000..d0946498d56 --- /dev/null +++ b/sql/wsrep_server_state.h @@ -0,0 +1,66 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_SERVER_STATE_H +#define WSREP_SERVER_STATE_H + +/* wsrep-lib */ +#include "wsrep/server_state.hpp" +#include "wsrep/provider.hpp" + +/* implementation */ +#include "wsrep_server_service.h" +#include "wsrep_mutex.h" +#include "wsrep_condition_variable.h" + +class Wsrep_server_state : public wsrep::server_state +{ +public: + static void init_once(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version); + static void destroy(); + static Wsrep_server_state& instance() + { + return *m_instance; + } + + static wsrep::provider& get_provider() + { + return instance().provider(); + } + + static bool has_capability(int capability) + { + return (get_provider().capabilities() & capability); + } +private: + Wsrep_server_state(const std::string& name, + const std::string& incoming_address, + const std::string& address, + const std::string& working_dir, + const wsrep::gtid& initial_position, + int max_protocol_version); + Wsrep_mutex m_mutex; + Wsrep_condition_variable m_cond; + Wsrep_server_service m_service; + + static Wsrep_server_state* m_instance; +}; + +#endif // WSREP_SERVER_STATE_H diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 0a2424fa069..4e3a7072629 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2015 Codership Oy <http://www.codership.com> +/* Copyright 2008-2017 Codership Oy <http://www.codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,16 +35,16 @@ static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 + sizeof(WSREP_SST_OPT_CONF) + sizeof(WSREP_SST_OPT_CONF_SUFFIX) + - sizeof(WSREP_SST_OPT_CONF_EXTRA)] = {0}; + sizeof(WSREP_SST_OPT_CONF_EXTRA)]= {0}; -const char* wsrep_sst_method = WSREP_SST_DEFAULT; -const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO; -const char* wsrep_sst_donor = ""; -const char* wsrep_sst_auth = NULL; +const char* wsrep_sst_method = WSREP_SST_DEFAULT; +const char* wsrep_sst_receive_address= WSREP_SST_ADDRESS_AUTO; +const char* wsrep_sst_donor = ""; +const char* wsrep_sst_auth = NULL; // container for real auth string -static const char* sst_auth_real = NULL; -my_bool wsrep_sst_donor_rejects_queries = FALSE; +static const char* sst_auth_real = NULL; +my_bool wsrep_sst_donor_rejects_queries= FALSE; bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) { @@ -65,7 +65,7 @@ bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type) return 0; } -static const char* data_home_dir = NULL; +static const char* data_home_dir= NULL; void wsrep_set_data_home_dir(const char *data_dir) { @@ -139,7 +139,7 @@ static bool sst_auth_real_set (const char* value) { // set sst_auth_real if (sst_auth_real) { my_free((void *) sst_auth_real); } - sst_auth_real = v; + sst_auth_real= v; // mask wsrep_sst_auth if (strlen(sst_auth_real)) @@ -180,6 +180,7 @@ bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type) return 0; } + bool wsrep_before_SE() { return (wsrep_provider != NULL @@ -188,111 +189,29 @@ bool wsrep_before_SE() && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); } -static bool sst_complete = false; -static bool sst_needed = false; - -#define WSREP_EXTEND_TIMEOUT_INTERVAL 30 -#define WSREP_TIMEDWAIT_SECONDS 10 - -void wsrep_sst_grab () -{ - WSREP_INFO("wsrep_sst_grab()"); - if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); - sst_complete = false; - mysql_mutex_unlock (&LOCK_wsrep_sst); -} - -// Wait for end of SST -bool wsrep_sst_wait () -{ - double total_wtime = 0; - - if (mysql_mutex_lock (&LOCK_wsrep_sst)) - abort(); - - WSREP_INFO("Waiting for SST to complete."); - - while (!sst_complete) - { - struct timespec wtime; - set_timespec(wtime, WSREP_TIMEDWAIT_SECONDS); - time_t start_time = time(NULL); - mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime); - time_t end_time = time(NULL); - - if (!sst_complete) - { - total_wtime += difftime(end_time, start_time); - WSREP_DEBUG("Waiting for SST to complete. current seqno: %" PRId64 " waited %f secs.", local_seqno, total_wtime); - service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, - "WSREP state transfer ongoing, current seqno: %ld waited %f secs", local_seqno, total_wtime); - } - } - - if (local_seqno >= 0) - { - WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno); - } - else - { - WSREP_ERROR("SST failed: %d (%s)", - int(-local_seqno), strerror(-local_seqno)); - } - - mysql_mutex_unlock (&LOCK_wsrep_sst); - - return (local_seqno >= 0); -} - // Signal end of SST -void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, - wsrep_seqno_t sst_seqno, - bool needed) +static void wsrep_sst_complete (THD* thd, + int const rcode) { - if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); - if (!sst_complete) - { - sst_complete = true; - sst_needed = needed; - local_uuid = *sst_uuid; - local_seqno = sst_seqno; - mysql_cond_signal (&COND_wsrep_sst); - } - else - { - /* This can happen when called from wsrep_synced_cb(). - At the moment there is no way to check there - if main thread is still waiting for signal, - so wsrep_sst_complete() is called from there - each time wsrep_ready changes from FALSE -> TRUE. - */ - WSREP_DEBUG("Nobody is waiting for SST."); - } - mysql_mutex_unlock (&LOCK_wsrep_sst); + Wsrep_client_service client_service(thd, thd->wsrep_cs()); + Wsrep_server_state::instance().sst_received(client_service, rcode); } -/* + /* If wsrep provider is loaded, inform that the new state snapshot has been received. Also update the local checkpoint. - @param wsrep [IN] wsrep handle + @param thd [IN] @param uuid [IN] Initial state UUID @param seqno [IN] Initial state sequence number @param state [IN] Always NULL, also ignored by wsrep provider (?) @param state_len [IN] Always 0, also ignored by wsrep provider (?) - @param implicit [IN] Whether invoked implicitly due to SST - (true) or explicitly because if change - in wsrep_start_position by user (false). - @return false Success - true Error - */ -bool wsrep_sst_received (wsrep_t* const wsrep, - const wsrep_uuid_t& uuid, - const wsrep_seqno_t seqno, - const void* const state, - const size_t state_len, - const bool implicit) +void wsrep_sst_received (THD* thd, + const wsrep_uuid_t& uuid, + wsrep_seqno_t const seqno, + const void* const state, + size_t const state_len) { /* To keep track of whether the local uuid:seqno should be updated. Also, note @@ -300,81 +219,40 @@ bool wsrep_sst_received (wsrep_t* const wsrep, OK from wsrep provider. By doing so, the values remain consistent across the server & wsrep provider. */ - bool do_update= false; - - // Get the locally stored uuid:seqno. - if (wsrep_get_SE_checkpoint(local_uuid, local_seqno)) - { - return true; - } - - if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || - local_seqno < seqno || seqno < 0) - { - do_update= true; - } - else if (local_seqno > seqno) - { - WSREP_WARN("SST position can't be set in past. Requested: %lld, Current: " - " %lld.", (long long)seqno, (long long)local_seqno); /* - If we are here because of SET command, simply return true (error) instead of - aborting. + TODO: Handle backwards compatibility. WSREP API v25 does not have + wsrep schema. */ - if (implicit) - { - WSREP_WARN("Can't continue."); - unireg_abort(1); - } - else - { - return true; + /* + Logical SST methods (mysqldump etc) don't update InnoDB sys header. + Reset the SE checkpoint before recovering view in order to avoid + sanity check failure. + */ + wsrep::gtid const sst_gtid(wsrep::id(uuid.data, sizeof(uuid.data)), + wsrep::seqno(seqno)); + + if (!wsrep_before_SE()) { + wsrep_set_SE_checkpoint(wsrep::gtid::undefined()); + wsrep_set_SE_checkpoint(sst_gtid); } - } + wsrep_verify_SE_checkpoint(uuid, seqno); -#ifdef GTID_SUPPORT - wsrep_init_sidno(uuid); -#endif /* GTID_SUPPORT */ - - if (wsrep) - { - int const rcode(seqno < 0 ? seqno : 0); - wsrep_gtid_t const state_id= {uuid, - (rcode ? WSREP_SEQNO_UNDEFINED : seqno)}; - - wsrep_status_t ret= wsrep->sst_received(wsrep, &state_id, state, - state_len, rcode); - - if (ret != WSREP_OK) - { - return true; + /* + Both wsrep_init_SR() and wsrep_recover_view() may use + wsrep thread pool. Restore original thd context before returning. + */ + if (thd) { + thd->store_globals(); + } + else { + my_pthread_setspecific_ptr(THR_THD, NULL); } - } - // Now is the good time to update the local state and checkpoint. - if (do_update) - { - if (wsrep_set_SE_checkpoint(uuid, seqno)) + if (WSREP_ON) { - return true; + int const rcode(seqno < 0 ? seqno : 0); + wsrep_sst_complete(thd,rcode); } - - local_uuid= uuid; - local_seqno= seqno; - } - - return false; -} - -// Let applier threads to continue -bool wsrep_sst_continue () -{ - if (sst_needed) - { - WSREP_INFO("Signalling provider to continue."); - return wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0, true); - } - return false; } struct sst_thread_arg @@ -404,11 +282,11 @@ struct sst_thread_arg static int sst_scan_uuid_seqno (const char* str, wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) { - int offt = wsrep_uuid_scan (str, strlen(str), uuid); + int offt= wsrep_uuid_scan (str, strlen(str), uuid); errno= 0; /* Reset the errno */ if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt]) { - *seqno = strtoll (str + offt + 1, NULL, 10); + *seqno= strtoll (str + offt + 1, NULL, 10); if (*seqno != LLONG_MAX || errno != ERANGE) { return 0; @@ -416,7 +294,7 @@ static int sst_scan_uuid_seqno (const char* str, } WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str); - return EINVAL; + return -EINVAL; } // get rid of trailing \n @@ -426,8 +304,8 @@ static char* my_fgets (char* buf, size_t buf_len, FILE* stream) if (ret) { - size_t len = strlen(ret); - if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0'; + size_t len= strlen(ret); + if (len > 0 && ret[len - 1] == '\n') ret[len - 1]= '\0'; } return ret; @@ -482,9 +360,10 @@ static void* sst_joiner_thread (void* a) int err= 1; { - const char magic[] = "ready"; - const size_t magic_len = sizeof(magic) - 1; - const size_t out_len = 512; + THD* thd; + const char magic[]= "ready"; + const size_t magic_len= sizeof(magic) - 1; + const size_t out_len= 512; char out[out_len]; WSREP_INFO("Running: '%s'", arg->cmd); @@ -501,29 +380,31 @@ static void* sst_joiner_thread (void* a) WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'", magic, arg->cmd, tmp); proc.wait(); - if (proc.error()) err = proc.error(); + if (proc.error()) err= proc.error(); } else { - err = 0; + err= 0; } } else { - err = proc.error(); + err= proc.error(); WSREP_ERROR("Failed to execute: %s : %d (%s)", arg->cmd, err, strerror(err)); } - // signal sst_prepare thread with ret code, - // it will go on sending SST request + /* + signal sst_prepare thread with ret code, + it will go on sending SST request + */ mysql_mutex_lock (&arg->lock); if (!err) { - arg->ret_str = strdup (out + magic_len + 1); - if (!arg->ret_str) err = ENOMEM; + arg->ret_str= strdup (out + magic_len + 1); + if (!arg->ret_str) err= ENOMEM; } - arg->err = -err; + arg->err= -err; mysql_cond_signal (&arg->cond); mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. @@ -531,11 +412,11 @@ static void* sst_joiner_thread (void* a) * initializer thread to ensure single thread of * shutdown. */ - wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; - wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED; + wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // in case of successfull receiver start, wait for SST completion/end - char* tmp = my_fgets (out, out_len, proc.pipe()); + char* tmp= my_fgets (out, out_len, proc.pipe()); proc.wait(); err= EINVAL; @@ -544,7 +425,7 @@ static void* sst_joiner_thread (void* a) { WSREP_ERROR("Failed to read uuid:seqno and wsrep_gtid_domain_id from " "joiner script."); - if (proc.error()) err = proc.error(); + if (proc.error()) err= proc.error(); } else { @@ -552,7 +433,14 @@ static void* sst_joiner_thread (void* a) const char *pos= strchr(out, ' '); if (!pos) { - // There is no wsrep_gtid_domain_id (some older version SST script?). + + if (wsrep_gtid_mode) + { + // There is no wsrep_gtid_domain_id (some older version SST script?). + WSREP_WARN("Did not find domain ID from SST script output '%s'. " + "Domain ID must be set manually to keep binlog consistent", + out); + } err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); } else { @@ -588,14 +476,59 @@ static void* sst_joiner_thread (void* a) err: + wsrep::gtid ret_gtid; + if (err) { - ret_uuid= WSREP_UUID_UNDEFINED; - ret_seqno= -err; + ret_gtid= wsrep::gtid::undefined(); + } + else + { + ret_gtid= wsrep::gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), + wsrep::seqno(ret_seqno)); } - // Tell initializer thread that SST is complete - wsrep_sst_complete (&ret_uuid, ret_seqno, true); + /* + Tell initializer thread that SST is complete + For that initialize a THD + */ + if (my_thread_init()) + { + WSREP_ERROR("my_thread_init() failed, can't signal end of SST. " + "Aborting."); + unireg_abort(1); + } + + thd= new THD(next_thread_id()); + + if (!thd) + { + WSREP_ERROR("Failed to allocate THD to restore view from local state, " + "can't signal end of SST. Aborting."); + unireg_abort(1); + } + + thd->thread_stack= (char*) &thd; + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_GENERIC; + thd->real_id= pthread_self(); + + thd->store_globals(); + + /* */ + thd->variables.wsrep_on = 0; + /* No binlogging */ + thd->variables.sql_log_bin = 0; + thd->variables.option_bits &= ~OPTION_BIN_LOG; + /* No general log */ + thd->variables.option_bits |= OPTION_LOG_OFF; + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; + + wsrep_sst_complete (thd, -err); + + delete thd; + my_thread_end(); } return NULL; @@ -694,7 +627,7 @@ static ssize_t sst_prepare_other (const char* method, " %s " WSREP_SST_OPT_PARENT " '%d'" " %s '%s'" - " %s '%s'", + " %s '%s'", method, addr_in, mysql_real_data_home, wsrep_defaults_file, (int)getpid(), binlog_opt, binlog_opt_val, @@ -734,7 +667,7 @@ static ssize_t sst_prepare_other (const char* method, pthread_t tmp; sst_thread_arg arg(cmd_str(), env()); mysql_mutex_lock (&arg.lock); - ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg); + ret= pthread_create (&tmp, NULL, sst_joiner_thread, &arg); if (ret) { WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)", @@ -746,11 +679,11 @@ static ssize_t sst_prepare_other (const char* method, *addr_out= arg.ret_str; if (!arg.err) - ret = strlen(*addr_out); + ret= strlen(*addr_out); else { assert (arg.err < 0); - ret = arg.err; + ret= arg.err; } pthread_detach (tmp); @@ -764,12 +697,12 @@ extern uint mysqld_port; static ssize_t sst_prepare_mysqldump (const char* addr_in, const char** addr_out) { - ssize_t ret = strlen (addr_in); + ssize_t ret= strlen (addr_in); if (!strrchr(addr_in, ':')) { - ssize_t s = ret + 7; - char* tmp = (char*) malloc (s); + ssize_t s= ret + 7; + char* tmp= (char*) malloc (s); if (tmp) { @@ -780,7 +713,7 @@ static ssize_t sst_prepare_mysqldump (const char* addr_in, *addr_out= tmp; return ret; } - if (ret > 0) /* buffer too short */ ret = -EMSGSIZE; + if (ret > 0) /* buffer too short */ ret= -EMSGSIZE; free (tmp); } else { @@ -797,32 +730,22 @@ static ssize_t sst_prepare_mysqldump (const char* addr_in, return ret; } -static bool SE_initialized = false; - -ssize_t wsrep_sst_prepare (void** msg) +std::string wsrep_sst_prepare() { + const ssize_t ip_max= 256; + char ip_buf[ip_max]; const char* addr_in= NULL; const char* addr_out= NULL; const char* method; if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP)) { - ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1; - *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL); - if (!msg) - { - WSREP_ERROR("Could not allocate %zd bytes for state request", ret); - unireg_abort(1); - } - return ret; + return WSREP_STATE_TRANSFER_TRIVIAL; } /* Figure out SST receive address. Common for all SST methods. */ - char ip_buf[256]; - const ssize_t ip_max= sizeof(ip_buf); - // Attempt 1: wsrep_sst_receive_address if (wsrep_sst_receive_address && strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO)) @@ -839,7 +762,7 @@ ssize_t wsrep_sst_prepare (void** msg) { WSREP_ERROR("Could not parse wsrep_node_address : %s", wsrep_node_address); - unireg_abort(1); + throw wsrep::runtime_error("Failed to prepare for SST. Unrecoverable"); } memcpy(ip_buf, addr.get_address(), addr.get_address_len()); addr_in= ip_buf; @@ -857,7 +780,7 @@ ssize_t wsrep_sst_prepare (void** msg) { WSREP_ERROR("Failed to guess address to accept state transfer. " "wsrep_sst_receive_address must be set manually."); - unireg_abort(1); + throw wsrep::runtime_error("Could not prepare state transfer request"); } } @@ -866,12 +789,16 @@ ssize_t wsrep_sst_prepare (void** msg) if (!strcmp(method, WSREP_SST_MYSQLDUMP)) { addr_len= sst_prepare_mysqldump (addr_in, &addr_out); - if (addr_len < 0) unireg_abort(1); + if (addr_len < 0) + { + throw wsrep::runtime_error("Could not prepare mysqldimp address"); + } } else { /*! A heuristic workaround until we learn how to stop and start engines */ - if (SE_initialized) + if (Wsrep_server_state::instance().is_initialized() && + Wsrep_server_state::instance().state() == Wsrep_server_state::s_joiner) { if (!strcmp(method, WSREP_SST_XTRABACKUP) || !strcmp(method, WSREP_SST_XTRABACKUPV2)) @@ -890,8 +817,7 @@ ssize_t wsrep_sst_prepare (void** msg) "if other means of state transfer are unavailable. " "In that case you will need to restart the server.", method); - *msg = 0; - return 0; + return ""; } addr_len = sst_prepare_other (method, sst_auth_real, @@ -900,37 +826,28 @@ ssize_t wsrep_sst_prepare (void** msg) { WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.", method); - unireg_abort(1); + throw wsrep::runtime_error("Failed to prepare for SST. Unrecoverable"); } } - size_t const method_len(strlen(method)); - size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/); + std::string ret; + ret += method; + ret.push_back('\0'); + ret += addr_out; - *msg = malloc (msg_len); - if (NULL != *msg) { - char* const method_ptr(reinterpret_cast<char*>(*msg)); - strcpy (method_ptr, method); - char* const addr_ptr(method_ptr + method_len + 1); - strcpy (addr_ptr, addr_out); - - WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr); - } - else { - WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.", - msg_len); - unireg_abort(1); - } + const char* method_ptr(ret.data()); + const char* addr_ptr(ret.data() + strlen(method_ptr) + 1); + WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr); if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out); - return msg_len; + return ret; } // helper method for donors static int sst_run_shell (const char* cmd_str, char** env, int max_tries) { - int ret = 0; + int ret= 0; for (int tries=1; tries <= max_tries; tries++) { @@ -941,7 +858,7 @@ static int sst_run_shell (const char* cmd_str, char** env, int max_tries) proc.wait(); } - if ((ret = proc.error())) + if ((ret= proc.error())) { WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)", tries, max_tries, proc.cmd(), ret, strerror(ret)); @@ -959,15 +876,12 @@ static int sst_run_shell (const char* cmd_str, char** env, int max_tries) static void sst_reject_queries(my_bool close_conn) { - wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced - WSREP_INFO("Rejecting client queries for the duration of SST."); - if (TRUE == close_conn) wsrep_close_client_connections(FALSE); + WSREP_INFO("Rejecting client queries for the duration of SST."); + if (TRUE == close_conn) wsrep_close_client_connections(FALSE); } static int sst_donate_mysqldump (const char* addr, - const wsrep_uuid_t* uuid, - const char* uuid_str, - wsrep_seqno_t seqno, + const wsrep::gtid& gtid, bool bypass, char** env) // carries auth info { @@ -990,23 +904,31 @@ static int sst_donate_mysqldump (const char* addr, return -ENOMEM; } + /* + we enable new client connections so that mysqldump donation can connect in, + but we reject local connections from modifyingcdata during SST, to keep + data intact + */ if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE); make_wsrep_defaults_file(); + std::ostringstream uuid_oss; + uuid_oss << gtid.id(); int ret= snprintf (cmd_str(), cmd_len, "wsrep_sst_mysqldump " WSREP_SST_OPT_ADDR " '%s' " - WSREP_SST_OPT_PORT " '%d' " + WSREP_SST_OPT_PORT " '%u' " WSREP_SST_OPT_LPORT " '%u' " WSREP_SST_OPT_SOCKET " '%s' " " %s " WSREP_SST_OPT_GTID " '%s:%lld' " WSREP_SST_OPT_GTID_DOMAIN_ID " '%d'" "%s", - addr, port, mysqld_port, mysqld_unix_port, - wsrep_defaults_file, uuid_str, - (long long)seqno, wsrep_gtid_domain_id, + addr, port, mysqld_port, mysqld_unix_port, + wsrep_defaults_file, + uuid_oss.str().c_str(), gtid.seqno().get(), + wsrep_gtid_domain_id, bypass ? " " WSREP_SST_OPT_BYPASS : ""); if (ret < 0 || ret >= cmd_len) @@ -1019,16 +941,17 @@ static int sst_donate_mysqldump (const char* addr, ret= sst_run_shell (cmd_str(), env, 3); - wsrep_gtid_t const state_id = { *uuid, (ret ? WSREP_SEQNO_UNDEFINED : seqno)}; - - wsrep->sst_sent (wsrep, &state_id, ret); + wsrep::gtid sst_sent_gtid(ret == 0 ? + gtid : + wsrep::gtid(gtid.id(), + wsrep::seqno::undefined())); + Wsrep_server_state::instance().sst_sent(sst_sent_gtid, ret); return ret; } wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; - /* Create a file under data directory. */ @@ -1077,7 +1000,6 @@ static int sst_create_file(const char *name, const char *content) return err; } - static int run_sql_command(THD *thd, const char *query) { thd->set_query((char *)query, strlen(query)); @@ -1123,9 +1045,9 @@ static int sst_flush_tables(THD* thd) { /* Do not use non-supported parser character sets */ WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); - thd->variables.character_set_client = &my_charset_latin1; + thd->variables.character_set_client= &my_charset_latin1; WSREP_WARN("For SST temporally setting character set to : %s", - my_charset_latin1.csname); + my_charset_latin1.csname); } if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) @@ -1146,7 +1068,7 @@ static int sst_flush_tables(THD* thd) } } - thd->variables.character_set_client = current_charset; + thd->variables.character_set_client= current_charset; if (err) { @@ -1164,7 +1086,6 @@ static int sst_flush_tables(THD* thd) else { WSREP_INFO("Tables flushed."); - /* Tables have been flushed. Create a file with cluster state ID and wsrep_gtid_domain_id. @@ -1173,6 +1094,41 @@ static int sst_flush_tables(THD* thd) snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno, wsrep_gtid_domain_id); err= sst_create_file(flush_success, content); + + const char base_name[]= "tables_flushed"; + ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2; + char *real_name= (char*) malloc(full_len); + sprintf(real_name, "%s/%s", mysql_real_data_home, base_name); + char *tmp_name= (char*) malloc(full_len + 4); + sprintf(tmp_name, "%s.tmp", real_name); + + FILE* file= fopen(tmp_name, "w+"); + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); + } + else + { + Wsrep_server_state& server_state= Wsrep_server_state::instance(); + std::ostringstream uuid_oss; + + uuid_oss << server_state.current_view().state_id().id(); + + fprintf(file, "%s:%lld %u\n", + uuid_oss.str().c_str(), server_state.pause_seqno().get(), + wsrep_gtid_domain_id); + fsync(fileno(file)); + fclose(file); + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", + tmp_name, real_name, err,strerror(err)); + } + } + free(real_name); + free(tmp_name); } return err; @@ -1181,19 +1137,19 @@ static int sst_flush_tables(THD* thd) static void sst_disallow_writes (THD* thd, bool yes) { - char query_str[64] = { 0, }; - ssize_t const query_max = sizeof(query_str) - 1; + char query_str[64]= { 0, }; + ssize_t const query_max= sizeof(query_str) - 1; CHARSET_INFO *current_charset; - current_charset = thd->variables.character_set_client; + current_charset= thd->variables.character_set_client; if (!is_supported_parser_charset(current_charset)) { /* Do not use non-supported parser character sets */ WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); - thd->variables.character_set_client = &my_charset_latin1; + thd->variables.character_set_client= &my_charset_latin1; WSREP_WARN("For SST temporally setting character set to : %s", - my_charset_latin1.csname); + my_charset_latin1.csname); } snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", @@ -1203,7 +1159,7 @@ static void sst_disallow_writes (THD* thd, bool yes) { WSREP_ERROR("Failed to disallow InnoDB writes"); } - thd->variables.character_set_client = current_charset; + thd->variables.character_set_client= current_charset; } static void* sst_donor_thread (void* a) @@ -1226,11 +1182,11 @@ static void* sst_donor_thread (void* a) // operate with wsrep_ready == OFF wsp::process proc(arg->cmd, "r", arg->env); - err= proc.error(); + err= -proc.error(); /* Inform server about SST script startup and release TO isolation */ mysql_mutex_lock (&arg->lock); - arg->err = -err; + arg->err= -err; mysql_cond_signal (&arg->cond); mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. @@ -1289,6 +1245,7 @@ wait_signal: else { WSREP_WARN("Received unknown signal: '%s'", out); + proc.wait(); } } else @@ -1296,7 +1253,7 @@ wait_signal: WSREP_ERROR("Failed to read from: %s", proc.cmd()); proc.wait(); } - if (!err && proc.error()) err= proc.error(); + if (!err && proc.error()) err= -proc.error(); } else { @@ -1315,24 +1272,20 @@ wait_signal: thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); } - // signal to donor that SST is over - struct wsrep_gtid const state_id = { - ret_uuid, err ? WSREP_SEQNO_UNDEFINED : ret_seqno - }; - wsrep->sst_sent (wsrep, &state_id, -err); + wsrep::gtid gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), + wsrep::seqno(err ? wsrep::seqno::undefined() : + wsrep::seqno(ret_seqno))); + Wsrep_server_state::instance().sst_sent(gtid, err); proc.wait(); return NULL; } - - -static int sst_donate_other (const char* method, - const char* addr, - const char* uuid, - wsrep_seqno_t seqno, - bool bypass, - char** env) // carries auth info +static int sst_donate_other (const char* method, + const char* addr, + const wsrep::gtid& gtid, + bool bypass, + char** env) // carries auth info { int const cmd_len= 4096; wsp::string cmd_str(cmd_len); @@ -1357,6 +1310,8 @@ static int sst_donate_other (const char* method, make_wsrep_defaults_file(); + std::ostringstream uuid_oss; + uuid_oss << gtid.id(); ret= snprintf (cmd_str(), cmd_len, "wsrep_sst_%s " WSREP_SST_OPT_ROLE " 'donor' " @@ -1371,7 +1326,7 @@ static int sst_donate_other (const char* method, method, addr, mysqld_unix_port, mysql_real_data_home, wsrep_defaults_file, binlog_opt, binlog_opt_val, - uuid, (long long) seqno, wsrep_gtid_domain_id, + uuid_oss.str().c_str(), gtid.seqno().get(), wsrep_gtid_domain_id, bypass ? " " WSREP_SST_OPT_BYPASS : ""); my_free(binlog_opt_val); @@ -1386,7 +1341,7 @@ static int sst_donate_other (const char* method, pthread_t tmp; sst_thread_arg arg(cmd_str(), env); mysql_mutex_lock (&arg.lock); - ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg); + ret= pthread_create (&tmp, NULL, sst_donor_thread, &arg); if (ret) { WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)", @@ -1399,23 +1354,18 @@ static int sst_donate_other (const char* method, return arg.err; } -wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, - const void* msg, size_t msg_len, - const wsrep_gtid_t* current_gtid, - const char* state, size_t state_len, - bool bypass) +int wsrep_sst_donate(const std::string& msg, + const wsrep::gtid& current_gtid, + const bool bypass) { /* This will be reset when sync callback is called. * Should we set wsrep_ready to FALSE here too? */ - wsrep_config_state->set(WSREP_MEMBER_DONOR); + wsrep_config_state->set(wsrep::server_state::s_donor); - const char* method = (char*)msg; - size_t method_len = strlen (method); - const char* data = method + method_len + 1; - - char uuid_str[37]; - wsrep_uuid_print (¤t_gtid->uuid, uuid_str, sizeof(uuid_str)); + const char* method= msg.data(); + size_t method_len= strlen (method); + const char* data= method + method_len + 1; wsp::env env(NULL); if (env.error()) @@ -1443,54 +1393,13 @@ wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, if (!strcmp (WSREP_SST_MYSQLDUMP, method)) { - ret = sst_donate_mysqldump(data, ¤t_gtid->uuid, uuid_str, - current_gtid->seqno, bypass, env()); + ret= sst_donate_mysqldump(data, current_gtid, bypass, env()); } else { - ret = sst_donate_other(method, data, uuid_str, - current_gtid->seqno, bypass, env()); + ret= sst_donate_other(method, data, current_gtid, bypass, env()); } - return (ret >= 0 ? WSREP_CB_SUCCESS : WSREP_CB_FAILURE); -} - -void wsrep_SE_init_grab() -{ - if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort(); -} - -void wsrep_SE_init_wait() -{ - double total_wtime=0; - - while (SE_initialized == false) - { - struct timespec wtime; - set_timespec(wtime, WSREP_TIMEDWAIT_SECONDS); - time_t start_time = time(NULL); - mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime); - time_t end_time = time(NULL); - - if (!SE_initialized) - { - total_wtime += difftime(end_time, start_time); - WSREP_DEBUG("Waiting for SST to complete. current seqno: %" PRId64 " waited %f secs.", local_seqno, total_wtime); - service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, - "WSREP state transfer ongoing, current seqno: %ld waited %f secs", local_seqno, total_wtime); - } - } - - mysql_mutex_unlock (&LOCK_wsrep_sst_init); -} - -void wsrep_SE_init_done() -{ - mysql_cond_signal (&COND_wsrep_sst_init); - mysql_mutex_unlock (&LOCK_wsrep_sst_init); -} - -void wsrep_SE_initialized() -{ - SE_initialized = true; + return (ret >= 0 ? 0 : 1); } + diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h index 29724a00797..46059a7f436 100644 --- a/sql/wsrep_sst.h +++ b/sql/wsrep_sst.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 Codership Oy <info@codership.com> +/* Copyright (C) 2013-2018 Codership Oy <info@codership.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,14 +13,14 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include <my_config.h> - #ifndef WSREP_SST_H #define WSREP_SST_H -#ifdef WITH_WSREP +#include <my_config.h> -#include <mysql.h> // my_bool +#include "wsrep/gtid.hpp" +#include <my_global.h> +#include <string> #define WSREP_SST_OPT_ROLE "--role" #define WSREP_SST_OPT_ADDR "--address" @@ -77,11 +77,29 @@ extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ +/** + Return a string containing the state transfer request string. + Note that the string may contain a '\0' in the middle. +*/ +std::string wsrep_sst_prepare(); + +/** + Donate a SST. + + @param request SST request string received from the joiner. Note that + the string may contain a '\0' in the middle. + @param gtid Current position of the donor + @param bypass If true, full SST is not needed. Joiner needs to be + notified that it can continue starting from gtid. + */ +int wsrep_sst_donate(const std::string& request, + const wsrep::gtid& gtid, + bool bypass); + #else #define wsrep_SE_initialized() do { } while(0) #define wsrep_SE_init_grab() do { } while(0) #define wsrep_SE_init_done() do { } while(0) #define wsrep_sst_continue() (0) -#endif /* WITH_WSREP */ #endif /* WSREP_SST_H */ diff --git a/sql/wsrep_storage_service.cc b/sql/wsrep_storage_service.cc new file mode 100644 index 00000000000..5a15f22ab57 --- /dev/null +++ b/sql/wsrep_storage_service.cc @@ -0,0 +1,238 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_global.h" +#include "wsrep_storage_service.h" +#include "wsrep_trans_observer.h" /* wsrep_open() */ +#include "wsrep_schema.h" +#include "wsrep_binlog.h" + +#include "sql_class.h" +#include "mysqld.h" /* next_query_id() */ +#include "slave.h" /* opt_log_slave_updates() */ +#include "transaction.h" /* trans_commit(), trans_rollback() */ + +/* + Temporarily enable wsrep on thd + */ +class Wsrep_on +{ +public: + Wsrep_on(THD* thd) + : m_thd(thd) + , m_wsrep_on(thd->variables.wsrep_on) + { + thd->variables.wsrep_on= TRUE; + } + ~Wsrep_on() + { + m_thd->variables.wsrep_on= m_wsrep_on; + } +private: + THD* m_thd; + my_bool m_wsrep_on; +}; + +Wsrep_storage_service::Wsrep_storage_service(THD* thd) + : wsrep::storage_service() + , wsrep::high_priority_context(thd->wsrep_cs()) + , m_thd(thd) +{ + thd->security_ctx->skip_grants(); + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + + /* No binlogging */ + + /* No general log */ + thd->variables.option_bits |= OPTION_LOG_OFF; + + /* Read committed isolation to avoid gap locking */ + thd->variables.tx_isolation = ISO_READ_COMMITTED; + + /* Keep wsrep on to enter commit ordering hooks */ + thd->variables.wsrep_on= 1; + thd->wsrep_skip_locking= true; + + wsrep_open(thd); + wsrep_before_command(thd); +} + +Wsrep_storage_service::~Wsrep_storage_service() +{ + wsrep_after_command_ignore_result(m_thd); + wsrep_close(m_thd); + m_thd->wsrep_skip_locking= false; +} + +int Wsrep_storage_service::start_transaction(const wsrep::ws_handle& ws_handle) +{ + DBUG_ENTER("Wsrep_storage_service::start_transaction"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::start_transcation(%llu, %p)", + m_thd->thread_id, m_thd)); + m_thd->set_wsrep_next_trx_id(ws_handle.transaction_id().get()); + DBUG_RETURN(m_thd->wsrep_cs().start_transaction( + wsrep::transaction_id(m_thd->wsrep_next_trx_id())) || + trans_begin(m_thd, MYSQL_START_TRANS_OPT_READ_WRITE)); +} + +void Wsrep_storage_service::adopt_transaction(const wsrep::transaction& transaction) +{ + DBUG_ENTER("Wsrep_Storage_server::adopt_transaction"); + DBUG_ASSERT(m_thd == current_thd); + m_thd->wsrep_cs().adopt_transaction(transaction); + trans_begin(m_thd, MYSQL_START_TRANS_OPT_READ_WRITE); + DBUG_VOID_RETURN; +} + +int Wsrep_storage_service::append_fragment(const wsrep::id& server_id, + wsrep::transaction_id transaction_id, + int flags, + const wsrep::const_buffer& data) +{ + DBUG_ENTER("Wsrep_storage_service::append_fragment"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::append_fragment(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= wsrep_schema->append_fragment(m_thd, + server_id, + transaction_id, + wsrep::seqno(-1), + flags, + data); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::update_fragment_meta(const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::update_fragment_meta"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::update_fragment_meta(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= wsrep_schema->update_fragment_meta(m_thd, ws_meta); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::remove_fragments() +{ + DBUG_ENTER("Wsrep_storage_service::remove_fragments"); + DBUG_ASSERT(m_thd == current_thd); + + int ret= wsrep_schema->remove_fragments(m_thd, + m_thd->wsrep_trx().server_id(), + m_thd->wsrep_trx().id(), + m_thd->wsrep_sr().fragments()); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::commit(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::commit"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::commit(%llu, %p)", + m_thd->thread_id, m_thd)); + WSREP_DEBUG("Storage service commit: %llu, %lld", + ws_meta.transaction_id().get(), ws_meta.seqno().get()); + int ret= 0; + const bool do_binlog_commit= (opt_log_slave_updates && wsrep_gtid_mode); + const bool is_ordered= !ws_meta.seqno().is_undefined(); + /* + Write skip event into binlog if gtid_mode is on. This is to + maintain gtid continuity. + */ + if (do_binlog_commit && is_ordered) + { + ret= wsrep_write_skip_event(m_thd); + } + + if (!ret && is_ordered) + { + ret= m_thd->wsrep_cs().prepare_for_ordering(ws_handle, + ws_meta, true); + } + + if (!ret) + { + if (!do_binlog_commit && is_ordered) + { + ret= wsrep_before_commit(m_thd, true); + } + ret= ret || trans_commit(m_thd); + if (!do_binlog_commit && is_ordered) + { + if (opt_log_slave_updates) + { + ret= ret || wsrep_ordered_commit(m_thd, true, wsrep_apply_error()); + } + ret= ret || wsrep_after_commit(m_thd, true); + } + } + + if (!is_ordered) + { + /* Wsrep commit was not ordered so it does not go through commit time + hooks and remains active. Roll it back to make cleanup happen + in after_applying() call. */ + m_thd->wsrep_cs().before_rollback(); + m_thd->wsrep_cs().after_rollback(); + } + else if (ret) + { + /* Commit failed, this probably means that the parent SR transaction + was BF aborted. Roll back out of order, the parent + transaction will release commit order after it has rolled back. */ + m_thd->wsrep_cs().prepare_for_ordering(wsrep::ws_handle(), + wsrep::ws_meta(), + false); + trans_rollback(m_thd); + } + m_thd->wsrep_cs().after_applying(); + m_thd->mdl_context.release_transactional_locks(); + DBUG_RETURN(ret); +} + +int Wsrep_storage_service::rollback(const wsrep::ws_handle& ws_handle, + const wsrep::ws_meta& ws_meta) +{ + DBUG_ENTER("Wsrep_storage_service::rollback"); + DBUG_ASSERT(m_thd == current_thd); + DBUG_PRINT("info", ("Wsrep_storage_service::rollback(%llu, %p)", + m_thd->thread_id, m_thd)); + int ret= (m_thd->wsrep_cs().prepare_for_ordering( + ws_handle, ws_meta, false) || + trans_rollback(m_thd)); + m_thd->wsrep_cs().after_applying(); + m_thd->mdl_context.release_transactional_locks(); + DBUG_RETURN(ret); +} + +void Wsrep_storage_service::store_globals() +{ + DBUG_ENTER("Wsrep_storage_service::store_globals"); + DBUG_PRINT("info", ("Wsrep_storage_service::store_globals(%llu, %p)", + m_thd->thread_id, m_thd)); + m_thd->store_globals(); + DBUG_VOID_RETURN; +} + +void Wsrep_storage_service::reset_globals() +{ + DBUG_ENTER("Wsrep_storage_service::reset_globals"); + DBUG_PRINT("info", ("Wsrep_storage_service::reset_globals(%llu, %p)", + m_thd->thread_id, m_thd)); + m_thd->reset_globals(); + DBUG_VOID_RETURN; +} diff --git a/sql/wsrep_storage_service.h b/sql/wsrep_storage_service.h new file mode 100644 index 00000000000..6208300930f --- /dev/null +++ b/sql/wsrep_storage_service.h @@ -0,0 +1,48 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef WSREP_STORAGE_SERVICE_H +#define WSREP_STORAGE_SERVICE_H + +#include "wsrep/storage_service.hpp" +#include "wsrep/client_state.hpp" + +class THD; +class Wsrep_server_service; +class Wsrep_storage_service : + public wsrep::storage_service, + public wsrep::high_priority_context +{ +public: + Wsrep_storage_service(THD*); + ~Wsrep_storage_service(); + int start_transaction(const wsrep::ws_handle&); + void adopt_transaction(const wsrep::transaction&); + int append_fragment(const wsrep::id&, + wsrep::transaction_id, + int flags, + const wsrep::const_buffer&); + int update_fragment_meta(const wsrep::ws_meta&); + int remove_fragments(); + int commit(const wsrep::ws_handle&, const wsrep::ws_meta&); + int rollback(const wsrep::ws_handle&, const wsrep::ws_meta&); + void store_globals(); + void reset_globals(); +private: + friend class Wsrep_server_service; + THD* m_thd; +}; + +#endif /* WSREP_STORAGE_SERVICE_H */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index ce6d9688cb3..4f9915fa05f 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -15,412 +15,82 @@ #include "mariadb.h" #include "wsrep_thd.h" +#include "wsrep_trans_observer.h" +#include "wsrep_high_priority_service.h" +#include "wsrep_storage_service.h" #include "transaction.h" #include "rpl_rli.h" #include "log_event.h" #include "sql_parse.h" -//#include "global_threads.h" // LOCK_thread_count, etc. #include "sql_base.h" // close_thread_tables() #include "mysqld.h" // start_wsrep_THD(); - -#include "slave.h" // opt_log_slave_updates -#include "rpl_filter.h" +#include "wsrep_applier.h" // start_wsrep_THD(); +#include "mysql/service_wsrep.h" +#include "debug_sync.h" +#include "slave.h" #include "rpl_rli.h" #include "rpl_mi.h" -#if (__LP64__) -static volatile int64 wsrep_bf_aborts_counter(0); -#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64 -#define WSREP_ATOMIC_ADD_LONG my_atomic_add64 -#else -static volatile int32 wsrep_bf_aborts_counter(0); -#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32 -#define WSREP_ATOMIC_ADD_LONG my_atomic_add32 -#endif +static Wsrep_thd_queue* wsrep_rollback_queue= 0; +static Wsrep_thd_queue* wsrep_post_rollback_queue= 0; +static Atomic_counter<uint64_t> wsrep_bf_aborts_counter; + int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff, enum enum_var_type scope) { - wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter); - var->type = SHOW_LONGLONG; - var->value = (char*)&wsrep_local_bf_aborts; + wsrep_local_bf_aborts= wsrep_bf_aborts_counter; + var->type= SHOW_LONGLONG; + var->value= (char*)&wsrep_local_bf_aborts; return 0; } -/* must have (&thd->LOCK_thd_data) */ -void wsrep_client_rollback(THD *thd) -{ - WSREP_DEBUG("client rollback due to BF abort for (%lld), query: %s", - (longlong) thd->thread_id, thd->query()); - - WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1); - - thd->wsrep_conflict_state= ABORTING; - mysql_mutex_unlock(&thd->LOCK_thd_data); - trans_rollback(thd); - - if (thd->locked_tables_mode && thd->lock) - { - WSREP_DEBUG("unlocking tables for BF abort (%lld)", - (longlong) thd->thread_id); - thd->locked_tables_list.unlock_locked_tables(thd); - thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); - } - - if (thd->global_read_lock.is_acquired()) - { - WSREP_DEBUG("unlocking GRL for BF abort (%lld)", - (longlong) thd->thread_id); - thd->global_read_lock.unlock_global_read_lock(thd); - } - - /* Release transactional metadata locks. */ - thd->mdl_context.release_transactional_locks(); - - /* release explicit MDL locks */ - thd->mdl_context.release_explicit_locks(); - - if (thd->get_binlog_table_maps()) - { - WSREP_DEBUG("clearing binlog table map for BF abort (%lld)", - (longlong) thd->thread_id); - thd->clear_binlog_table_maps(); - } - mysql_mutex_lock(&thd->LOCK_thd_data); - thd->wsrep_conflict_state= ABORTED; -} - -#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 -#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 - -static rpl_group_info* wsrep_relay_group_init(const char* log_fname) -{ - Relay_log_info* rli= new Relay_log_info(false); - - if (!rli->relay_log.description_event_for_exec) - { - rli->relay_log.description_event_for_exec= - new Format_description_log_event(4); - } - - static LEX_CSTRING connection_name= { STRING_WITH_LEN("wsrep") }; - - /* - Master_info's constructor initializes rpl_filter by either an already - constructed Rpl_filter object from global 'rpl_filters' list if the - specified connection name is same, or it constructs a new Rpl_filter - object and adds it to rpl_filters. This object is later destructed by - Mater_info's destructor by looking it up based on connection name in - rpl_filters list. - - However, since all Master_info objects created here would share same - connection name ("wsrep"), destruction of any of the existing Master_info - objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced - by any/all existing Master_info objects. - - In order to avoid that, we have added a check in Master_info's destructor - to not free the "wsrep" rpl_filter. It will eventually be freed by - free_all_rpl_filters() when server terminates. - */ - rli->mi = new Master_info(&connection_name, false); - - struct rpl_group_info *rgi= new rpl_group_info(rli); - rgi->thd= rli->sql_driver_thd= current_thd; - - if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) - { - rgi->deferred_events= new Deferred_log_events(rli); - } - - return rgi; -} - -static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow) +static void wsrep_replication_process(THD *thd, + void* arg __attribute__((unused))) { - shadow->options = thd->variables.option_bits; - shadow->server_status = thd->server_status; - shadow->wsrep_exec_mode = thd->wsrep_exec_mode; - shadow->vio = thd->net.vio; - - // Disable general logging on applier threads - thd->variables.option_bits |= OPTION_LOG_OFF; - // Enable binlogging if opt_log_slave_updates is set - if (opt_log_slave_updates) - thd->variables.option_bits|= OPTION_BIN_LOG; - else - thd->variables.option_bits&= ~(OPTION_BIN_LOG); + DBUG_ENTER("wsrep_replication_process"); - if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay"); + Wsrep_applier_service applier_service(thd); /* thd->system_thread_info.rpl_sql_info isn't initialized. */ thd->system_thread_info.rpl_sql_info= new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter); - thd->wsrep_exec_mode= REPL_RECV; - thd->net.vio= 0; - thd->clear_error(); - - shadow->tx_isolation = thd->variables.tx_isolation; - thd->variables.tx_isolation = ISO_READ_COMMITTED; - thd->tx_isolation = ISO_READ_COMMITTED; - - shadow->db = thd->db.str; - shadow->db_length = thd->db.length; - shadow->user_time = thd->user_time; - shadow->row_count_func= thd->get_row_count_func(); - thd->reset_db(&null_clex_str); -} + WSREP_INFO("Starting applier thread %llu", thd->thread_id); + enum wsrep::provider::status + ret= Wsrep_server_state::get_provider().run_applier(&applier_service); -static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow) -{ - LEX_CSTRING db= {shadow->db, shadow->db_length }; - thd->variables.option_bits = shadow->options; - thd->server_status = shadow->server_status; - thd->wsrep_exec_mode = shadow->wsrep_exec_mode; - thd->net.vio = shadow->vio; - thd->variables.tx_isolation = shadow->tx_isolation; - thd->user_time = shadow->user_time; - thd->reset_db(&db); + WSREP_INFO("Applier thread exiting %d", ret); + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); delete thd->system_thread_info.rpl_sql_info; delete thd->wsrep_rgi->rli->mi; delete thd->wsrep_rgi->rli; - + thd->wsrep_rgi->cleanup_after_session(); delete thd->wsrep_rgi; - thd->wsrep_rgi = NULL; - thd->set_row_count_func(shadow->row_count_func); -} - -void wsrep_replay_transaction(THD *thd) -{ - DBUG_ENTER("wsrep_replay_transaction"); - /* checking if BF trx must be replayed */ - if (thd->wsrep_conflict_state== MUST_REPLAY) { - DBUG_ASSERT(wsrep_thd_trx_seqno(thd)); - if (thd->wsrep_exec_mode!= REPL_RECV) { - if (thd->get_stmt_da()->is_sent()) - { - WSREP_ERROR("replay issue, thd has reported status already"); - } - - - /* - PS reprepare observer should have been removed already. - open_table() will fail if we have dangling observer here. - */ - DBUG_ASSERT(thd->m_reprepare_observer == NULL); - - struct da_shadow - { - enum Diagnostics_area::enum_diagnostics_status status; - ulonglong affected_rows; - ulonglong last_insert_id; - char message[MYSQL_ERRMSG_SIZE]; - }; - struct da_shadow da_status; - da_status.status= thd->get_stmt_da()->status(); - if (da_status.status == Diagnostics_area::DA_OK) - { - da_status.affected_rows= thd->get_stmt_da()->affected_rows(); - da_status.last_insert_id= thd->get_stmt_da()->last_insert_id(); - strmake(da_status.message, - thd->get_stmt_da()->message(), - sizeof(da_status.message)-1); - } - - thd->get_stmt_da()->reset_diagnostics_area(); - - thd->wsrep_conflict_state= REPLAYING; - mysql_mutex_unlock(&thd->LOCK_thd_data); + thd->wsrep_rgi= NULL; - thd->reset_for_next_command(); - thd->reset_killed(); - close_thread_tables(thd); - if (thd->locked_tables_mode && thd->lock) - { - WSREP_DEBUG("releasing table lock for replaying (%lld)", - (longlong) thd->thread_id); - thd->locked_tables_list.unlock_locked_tables(thd); - thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); - } - thd->mdl_context.release_transactional_locks(); - /* - Replaying will call MYSQL_START_STATEMENT when handling - BEGIN Query_log_event so end statement must be called before - replaying. - */ - MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); - thd->m_statement_psi= NULL; - thd->m_digest= NULL; - thd_proc_info(thd, "WSREP replaying trx"); - WSREP_DEBUG("replay trx: %s %lld", - thd->query() ? thd->query() : "void", - (long long)wsrep_thd_trx_seqno(thd)); - struct wsrep_thd_shadow shadow; - wsrep_prepare_bf_thd(thd, &shadow); - - /* From trans_begin() */ - thd->variables.option_bits|= OPTION_BEGIN; - thd->server_status|= SERVER_STATUS_IN_TRANS; - - int rcode = wsrep->replay_trx(wsrep, - &thd->wsrep_ws_handle, - (void *)thd); - - wsrep_return_from_bf_mode(thd, &shadow); - if (thd->wsrep_conflict_state!= REPLAYING) - WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state ); - - mysql_mutex_lock(&thd->LOCK_thd_data); - - switch (rcode) - { - case WSREP_OK: - thd->wsrep_conflict_state= NO_CONFLICT; - wsrep->post_commit(wsrep, &thd->wsrep_ws_handle); - WSREP_DEBUG("trx_replay successful for: %lld %lld", - (longlong) thd->thread_id, (longlong) thd->real_id); - if (thd->get_stmt_da()->is_sent()) - { - WSREP_WARN("replay ok, thd has reported status"); - } - else if (thd->get_stmt_da()->is_set()) - { - if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK && - thd->get_stmt_da()->status() != Diagnostics_area::DA_OK_BULK) - { - WSREP_WARN("replay ok, thd has error status %d", - thd->get_stmt_da()->status()); - } - } - else - { - if (da_status.status == Diagnostics_area::DA_OK) - { - my_ok(thd, - da_status.affected_rows, - da_status.last_insert_id, - da_status.message); - } - else - { - my_ok(thd); - } - } - break; - case WSREP_TRX_FAIL: - if (thd->get_stmt_da()->is_sent()) - { - WSREP_ERROR("replay failed, thd has reported status"); - } - else - { - WSREP_DEBUG("replay failed, rolling back"); - } - thd->wsrep_conflict_state= ABORTED; - wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle); - break; - default: - WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", - rcode, thd->get_db(), - thd->query() ? thd->query() : "void"); - /* we're now in inconsistent state, must abort */ - - /* http://bazaar.launchpad.net/~codership/codership-mysql/5.6/revision/3962#sql/wsrep_thd.cc */ - mysql_mutex_unlock(&thd->LOCK_thd_data); - - unireg_abort(1); - break; - } - - wsrep_cleanup_transaction(thd); - - mysql_mutex_lock(&LOCK_wsrep_replaying); - wsrep_replaying--; - WSREP_DEBUG("replaying decreased: %d, thd: %lld", - wsrep_replaying, (longlong) thd->thread_id); - mysql_cond_broadcast(&COND_wsrep_replaying); - mysql_mutex_unlock(&LOCK_wsrep_replaying); - } - } - DBUG_VOID_RETURN; -} - -static void wsrep_replication_process(THD *thd) -{ - int rcode; - DBUG_ENTER("wsrep_replication_process"); - - struct wsrep_thd_shadow shadow; - wsrep_prepare_bf_thd(thd, &shadow); - - /* From trans_begin() */ - thd->variables.option_bits|= OPTION_BEGIN; - thd->server_status|= SERVER_STATUS_IN_TRANS; - - rcode = wsrep->recv(wsrep, (void *)thd); - DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode)); - - WSREP_INFO("applier thread exiting (code:%d)", rcode); - - switch (rcode) { - case WSREP_OK: - case WSREP_NOT_IMPLEMENTED: - case WSREP_CONN_FAIL: - /* provider does not support slave operations / disconnected from group, - * just close applier thread */ - break; - case WSREP_NODE_FAIL: - /* data inconsistency => SST is needed */ - /* Note: we cannot just blindly restart replication here, - * SST might require server restart if storage engines must be - * initialized after SST */ - WSREP_ERROR("node consistency compromised, aborting"); - wsrep_kill_mysql(thd); - break; - case WSREP_WARNING: - case WSREP_TRX_FAIL: - case WSREP_TRX_MISSING: - /* these suggests a bug in provider code */ - WSREP_WARN("bad return from recv() call: %d", rcode); - /* Shut down this node. */ - /* fall through */ - case WSREP_FATAL: - /* Cluster connectivity is lost. - * - * If applier was killed on purpose (KILL_CONNECTION), we - * avoid mysql shutdown. This is because the killer will then handle - * shutdown processing (or replication restarting) - */ - if (thd->killed != KILL_CONNECTION) - { - wsrep_kill_mysql(thd); - } - break; - } - - mysql_mutex_lock(&LOCK_thread_count); - wsrep_close_applier(thd); - mysql_cond_broadcast(&COND_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); if(thd->has_thd_temporary_tables()) { WSREP_WARN("Applier %lld has temporary tables at exit.", thd->thread_id); } - wsrep_return_from_bf_mode(thd, &shadow); DBUG_VOID_RETURN; } -static bool create_wsrep_THD(wsrep_thd_processor_fun processor) +static bool create_wsrep_THD(Wsrep_thd_args* args) { ulong old_wsrep_running_threads= wsrep_running_threads; pthread_t unused; mysql_mutex_lock(&LOCK_thread_count); + bool res= pthread_create(&unused, &connection_attrib, start_wsrep_THD, - (void*)processor); + args); /* if starting a thread on server startup, wait until the this thread's THD is fully initialized (otherwise a THD initialization code might @@ -435,244 +105,291 @@ static bool create_wsrep_THD(wsrep_thd_processor_fun processor) void wsrep_create_appliers(long threads) { - if (!wsrep_connected) + /* Dont' start slave threads if wsrep-provider or wsrep-cluster-address + is not set. + */ + if (!WSREP_PROVIDER_EXISTS) + { + return; + } + + if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) { - /* see wsrep_replication_start() for the logic */ - if (wsrep_cluster_address && strlen(wsrep_cluster_address) && - wsrep_provider && strcasecmp(wsrep_provider, "none")) - { - WSREP_ERROR("Trying to launch slave threads before creating " - "connection at '%s'", wsrep_cluster_address); - assert(0); - } return; } long wsrep_threads=0; - while (wsrep_threads++ < threads) { - if (create_wsrep_THD(wsrep_replication_process)) + + while (wsrep_threads++ < threads) + { + Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_replication_process, 0)); + if (create_wsrep_THD(args)) + { WSREP_WARN("Can't create thread to manage wsrep replication"); + } } } -static void wsrep_rollback_process(THD *thd) +static void wsrep_rollback_process(THD *rollbacker, + void *arg __attribute__((unused))) { DBUG_ENTER("wsrep_rollback_process"); - mysql_mutex_lock(&LOCK_wsrep_rollback); - wsrep_aborting_thd= NULL; - - while (thd->killed == NOT_KILLED) { - thd_proc_info(thd, "WSREP aborter idle"); - thd->mysys_var->current_mutex= &LOCK_wsrep_rollback; - thd->mysys_var->current_cond= &COND_wsrep_rollback; + THD* thd= NULL; + DBUG_ASSERT(!wsrep_rollback_queue); + wsrep_rollback_queue= new Wsrep_thd_queue(rollbacker); - mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback); + thd_proc_info(rollbacker, "wsrep aborter idle"); + while ((thd= wsrep_rollback_queue->pop_front()) != NULL) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + wsrep::client_state& cs(thd->wsrep_cs()); + const wsrep::transaction& tx(cs.transaction()); + if (tx.state() == wsrep::transaction::s_aborted) + { + WSREP_DEBUG("rollbacker thd already aborted: %llu state: %d", + (long long)thd->real_id, + tx.state()); - WSREP_DEBUG("WSREP rollback thread wakes for signal"); + mysql_mutex_unlock(&thd->LOCK_thd_data); + continue; + } + mysql_mutex_unlock(&thd->LOCK_thd_data); - mysql_mutex_lock(&thd->mysys_var->mutex); - thd_proc_info(thd, "WSREP aborter active"); - thd->mysys_var->current_mutex= 0; - thd->mysys_var->current_cond= 0; - mysql_mutex_unlock(&thd->mysys_var->mutex); + thd_proc_info(rollbacker, "wsrep aborter active"); - /* check for false alarms */ - if (!wsrep_aborting_thd) + wsrep::transaction_id transaction_id(thd->wsrep_trx().id()); + if (thd->wsrep_trx().is_streaming() && + thd->wsrep_trx().bf_aborted_in_total_order()) { - WSREP_DEBUG("WSREP rollback thread has empty abort queue"); - } - /* process all entries in the queue */ - while (wsrep_aborting_thd) { - THD *aborting; - wsrep_aborting_thd_t next = wsrep_aborting_thd->next; - aborting = wsrep_aborting_thd->aborting_thd; - my_free(wsrep_aborting_thd); - wsrep_aborting_thd= next; - /* - * must release mutex, appliers my want to add more - * aborting thds in our work queue, while we rollback - */ - mysql_mutex_unlock(&LOCK_wsrep_rollback); - - mysql_mutex_lock(&aborting->LOCK_thd_data); - if (aborting->wsrep_conflict_state== ABORTED) + thd->store_globals(); + thd->wsrep_cs().store_globals(); + if (thd->wsrep_cs().mode() == wsrep::client_state::m_high_priority) { - WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d", - (long long)aborting->real_id, - aborting->wsrep_conflict_state); - - mysql_mutex_unlock(&aborting->LOCK_thd_data); - mysql_mutex_lock(&LOCK_wsrep_rollback); - continue; + DBUG_ASSERT(thd->wsrep_applier_service); + thd->wsrep_applier_service->rollback(wsrep::ws_handle(), + wsrep::ws_meta()); + thd->wsrep_applier_service->after_apply(); + /* Will free THD */ + Wsrep_server_state::instance().server_service(). + release_high_priority_service(thd->wsrep_applier_service); } - aborting->wsrep_conflict_state= ABORTING; - - mysql_mutex_unlock(&aborting->LOCK_thd_data); - - set_current_thd(aborting); - aborting->store_globals(); - - mysql_mutex_lock(&aborting->LOCK_thd_data); - wsrep_client_rollback(aborting); - WSREP_DEBUG("WSREP rollbacker aborted thd: (%lld %lld)", - (longlong) aborting->thread_id, - (longlong) aborting->real_id); - mysql_mutex_unlock(&aborting->LOCK_thd_data); - - set_current_thd(thd); + else + { + mysql_mutex_lock(&thd->LOCK_thd_data); + /* prepare THD for rollback processing */ + thd->reset_for_next_command(true); + thd->lex->sql_command= SQLCOM_ROLLBACK; + mysql_mutex_unlock(&thd->LOCK_thd_data); + /* Perform a client rollback, restore globals and signal + the victim only when all the resources have been + released */ + thd->wsrep_cs().client_service().bf_rollback(); + thd->reset_globals(); + thd->wsrep_cs().sync_rollback_complete(); + } + } + else if (wsrep_thd_is_applying(thd)) + { + WSREP_DEBUG("rollbacker aborting SR thd: (%lld %llu)", + thd->thread_id, (long long)thd->real_id); + DBUG_ASSERT(thd->wsrep_cs().mode() == Wsrep_client_state::m_high_priority); + /* Must be streaming and must have been removed from the + server state streaming appliers map. */ + DBUG_ASSERT(thd->wsrep_trx().is_streaming()); + DBUG_ASSERT(!Wsrep_server_state::instance().find_streaming_applier( + thd->wsrep_trx().server_id(), + thd->wsrep_trx().id())); + DBUG_ASSERT(thd->wsrep_applier_service); + + /* Fragment removal should happen before rollback to make + the transaction non-observable in SR table after the rollback + completes. For correctness the order does not matter here, + but currently it is mandated by checks in some MTR tests. */ + Wsrep_storage_service* storage_service= + static_cast<Wsrep_storage_service*>( + Wsrep_server_state::instance().server_service().storage_service( + *thd->wsrep_applier_service)); + storage_service->store_globals(); + storage_service->adopt_transaction(thd->wsrep_trx()); + storage_service->remove_fragments(); + storage_service->commit(wsrep::ws_handle(transaction_id, 0), + wsrep::ws_meta()); + Wsrep_server_state::instance().server_service().release_storage_service(storage_service); thd->store_globals(); + thd->wsrep_cs().store_globals(); + thd->wsrep_applier_service->rollback(wsrep::ws_handle(), + wsrep::ws_meta()); + thd->wsrep_applier_service->after_apply(); + /* Will free THD */ + Wsrep_server_state::instance().server_service() + .release_high_priority_service(thd->wsrep_applier_service); - mysql_mutex_lock(&LOCK_wsrep_rollback); } + else + { + if (thd->wsrep_trx().is_streaming()) + { + Wsrep_storage_service* storage_service= + static_cast<Wsrep_storage_service*>( + Wsrep_server_state::instance().server_service(). + storage_service(thd->wsrep_cs().client_service())); + + storage_service->store_globals(); + storage_service->adopt_transaction(thd->wsrep_trx()); + storage_service->remove_fragments(); + storage_service->commit(wsrep::ws_handle(transaction_id, 0), + wsrep::ws_meta()); + Wsrep_server_state::instance().server_service(). + release_storage_service(storage_service); + } + thd->store_globals(); + thd->wsrep_cs().store_globals(); + mysql_mutex_lock(&thd->LOCK_thd_data); + /* prepare THD for rollback processing */ + thd->reset_for_next_command(); + thd->lex->sql_command= SQLCOM_ROLLBACK; + mysql_mutex_unlock(&thd->LOCK_thd_data); + /* Perform a client rollback, restore globals and signal + the victim only when all the resources have been + released */ + thd->wsrep_cs().client_service().bf_rollback(); + thd->reset_globals(); + thd->wsrep_cs().sync_rollback_complete(); + WSREP_DEBUG("rollbacker aborted thd: (%llu %llu)", + thd->thread_id, (long long)thd->real_id); + } + + thd_proc_info(rollbacker, "wsrep aborter idle"); } + + delete wsrep_rollback_queue; + wsrep_rollback_queue= NULL; - mysql_mutex_unlock(&LOCK_wsrep_rollback); sql_print_information("WSREP: rollbacker thread exiting"); + DBUG_ASSERT(rollbacker->killed != NOT_KILLED); DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); DBUG_VOID_RETURN; } -void wsrep_create_rollbacker() +static void wsrep_post_rollback_process(THD *post_rollbacker, + void *arg __attribute__((unused))) { - if (wsrep_provider && strcasecmp(wsrep_provider, "none")) - { - /* create rollbacker */ - if (create_wsrep_THD(wsrep_rollback_process)) - WSREP_WARN("Can't create thread to manage wsrep rollback"); - } -} + DBUG_ENTER("wsrep_post_rollback_process"); + THD* thd= NULL; -void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe) -{ - if (thd_ptr) - { - THD* thd = (THD*)thd_ptr; - thd->wsrep_PA_safe = safe; - } -} + DBUG_ASSERT(!wsrep_post_rollback_queue); + wsrep_post_rollback_queue= new Wsrep_thd_queue(post_rollbacker); -enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd, my_bool sync) -{ - enum wsrep_conflict_state state = NO_CONFLICT; - if (thd) + while ((thd= wsrep_post_rollback_queue->pop_front()) != NULL) { - if (sync) mysql_mutex_lock(&thd->LOCK_thd_data); - - state = thd->wsrep_conflict_state; - if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data); + thd->store_globals(); + wsrep::client_state& cs(thd->wsrep_cs()); + mysql_mutex_lock(&thd->LOCK_thd_data); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_aborting); + WSREP_DEBUG("post rollbacker calling post rollback for thd %llu, conf %s", + thd->thread_id, wsrep_thd_transaction_state_str(thd)); + + cs.after_rollback(); + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_aborted); + mysql_mutex_unlock(&thd->LOCK_thd_data); } - return state; -} -my_bool wsrep_thd_is_wsrep(THD *thd) -{ - my_bool status = FALSE; - if (thd) - { - status = (WSREP(thd) && WSREP_PROVIDER_EXISTS); - } - return status; + delete wsrep_post_rollback_queue; + wsrep_post_rollback_queue= NULL; + + DBUG_ASSERT(post_rollbacker->killed != NOT_KILLED); + DBUG_PRINT("wsrep",("wsrep post rollbacker thread exiting")); + DBUG_VOID_RETURN; } -my_bool wsrep_thd_is_BF(THD *thd, my_bool sync) +void wsrep_create_rollbacker() { - my_bool status = FALSE; - if (thd) + if (wsrep_provider && strcasecmp(wsrep_provider, "none")) { - // THD can be BF only if provider exists - if (wsrep_thd_is_wsrep(thd)) - { - if (sync) - mysql_mutex_lock(&thd->LOCK_thd_data); + Wsrep_thd_args* args= new Wsrep_thd_args(wsrep_rollback_process, 0); - status = ((thd->wsrep_exec_mode == REPL_RECV) || - (thd->wsrep_exec_mode == TOTAL_ORDER)); - if (sync) - mysql_mutex_unlock(&thd->LOCK_thd_data); - } - } - return status; -} + /* create rollbacker */ + if (create_wsrep_THD(args)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); -extern "C" -my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync) -{ - bool status = FALSE; - if (thd_ptr) - { - THD* thd = (THD*)thd_ptr; - if (sync) mysql_mutex_lock(&thd->LOCK_thd_data); - - status = ((thd->wsrep_exec_mode == REPL_RECV) || - (thd->wsrep_exec_mode == TOTAL_ORDER) || - (thd->wsrep_exec_mode == LOCAL_COMMIT)); - if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data); - } - return status; + /* create post_rollbacker */ + args= new Wsrep_thd_args(wsrep_post_rollback_process, 0); + if (create_wsrep_THD(args)) + WSREP_WARN("Can't create thread to manage wsrep post rollback"); + } } -extern "C" -my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) +/* + Start async rollback process + + Asserts thd->LOCK_thd_data ownership + */ +void wsrep_fire_rollbacker(THD *thd) { - bool status = FALSE; - if (thd_ptr) + DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_aborting); + DBUG_PRINT("wsrep",("enqueuing trx abort for %llu", thd->thread_id)); + WSREP_DEBUG("enqueuing trx abort for (%llu)", thd->thread_id); + if (wsrep_rollback_queue->push_back(thd)) { - THD* thd = (THD*)thd_ptr; - if (sync) mysql_mutex_lock(&thd->LOCK_thd_data); - - status = (thd->wsrep_exec_mode == LOCAL_STATE); - if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data); + WSREP_WARN("duplicate thd %llu for rollbacker", + thd->thread_id); } - return status; } + int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) { - THD *victim_thd = (THD *) victim_thd_ptr; - THD *bf_thd = (THD *) bf_thd_ptr; DBUG_ENTER("wsrep_abort_thd"); - + THD *victim_thd= (THD *) victim_thd_ptr; + THD *bf_thd= (THD *) bf_thd_ptr; + mysql_mutex_lock(&victim_thd->LOCK_thd_data); if ( (WSREP(bf_thd) || ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && - bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && - victim_thd) + wsrep_thd_is_toi(bf_thd)) ) && + victim_thd && + !wsrep_thd_is_aborting(victim_thd)) { - if ((victim_thd->wsrep_conflict_state == MUST_ABORT) || - (victim_thd->wsrep_conflict_state == ABORTED) || - (victim_thd->wsrep_conflict_state == ABORTING)) - { - WSREP_DEBUG("wsrep_abort_thd called by %llu with victim %llu already " - "aborted. Ignoring.", - (bf_thd) ? (long long)bf_thd->real_id : 0, - (long long)victim_thd->real_id); - DBUG_RETURN(1); - } - - WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? - (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); - ha_abort_transaction(bf_thd, victim_thd, signal); + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? + (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + ha_abort_transaction(bf_thd, victim_thd, signal); + mysql_mutex_lock(&victim_thd->LOCK_thd_data); } else { WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); } - + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); DBUG_RETURN(1); } -extern "C" -int wsrep_thd_in_locking_session(void *thd_ptr) +bool wsrep_bf_abort(const THD* bf_thd, THD* victim_thd) { - if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) { - return 1; + WSREP_LOG_THD((THD*)bf_thd, "BF aborter before"); + WSREP_LOG_THD(victim_thd, "victim before"); + wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); + + if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) + { + WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); + wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); } - return 0; -} -bool wsrep_thd_has_explicit_locks(THD *thd) -{ - assert(thd); - return thd->mdl_context.has_explicit_locks(); + bool ret; + if (wsrep_thd_is_toi(bf_thd)) + { + ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); + } + else + { + ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); + } + if (ret) + { + wsrep_bf_aborts_counter++; + } + return ret; } + diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index 5900668f3fb..3114e02e1b8 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -13,42 +13,220 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include <my_config.h> - #ifndef WSREP_THD_H #define WSREP_THD_H -#ifdef WITH_WSREP +#include <my_config.h> +#include "mysql/service_wsrep.h" +#include "wsrep/client_state.hpp" #include "sql_class.h" +#include "wsrep_utils.h" +#include <deque> +class Wsrep_thd_queue +{ +public: + Wsrep_thd_queue(THD* t) : thd(t) + { + mysql_mutex_init(key_LOCK_wsrep_thd_queue, + &LOCK_wsrep_thd_queue, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_thd_queue, &COND_wsrep_thd_queue, NULL); + } + ~Wsrep_thd_queue() + { + mysql_mutex_destroy(&LOCK_wsrep_thd_queue); + mysql_cond_destroy(&COND_wsrep_thd_queue); + } + bool push_back(THD* thd) + { + DBUG_ASSERT(thd); + wsp::auto_lock lock(&LOCK_wsrep_thd_queue); + std::deque<THD*>::iterator it = queue.begin(); + while (it != queue.end()) + { + if (*it == thd) + { + return true; + } + it++; + } + queue.push_back(thd); + mysql_cond_signal(&COND_wsrep_thd_queue); + return false; + } + THD* pop_front() + { + wsp::auto_lock lock(&LOCK_wsrep_thd_queue); + while (queue.empty()) + { + if (thd->killed != NOT_KILLED) + return NULL; + + thd->mysys_var->current_mutex= &LOCK_wsrep_thd_queue; + thd->mysys_var->current_cond= &COND_wsrep_thd_queue; + + mysql_cond_wait(&COND_wsrep_thd_queue, &LOCK_wsrep_thd_queue); + + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + } + THD* ret= queue.front(); + queue.pop_front(); + return ret; + } +private: + THD* thd; + std::deque<THD*> queue; + mysql_mutex_t LOCK_wsrep_thd_queue; + mysql_cond_t COND_wsrep_thd_queue; +}; + +void wsrep_prepare_bf_thd(THD*, struct wsrep_thd_shadow*); +void wsrep_return_from_bf_mode(THD*, struct wsrep_thd_shadow*); int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff, enum enum_var_type scope); -void wsrep_client_rollback(THD *thd); +void wsrep_client_rollback(THD *thd, bool rollbacker = false); void wsrep_replay_transaction(THD *thd); void wsrep_create_appliers(long threads); void wsrep_create_rollbacker(); +bool wsrep_bf_abort(const THD*, THD*); int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal); - -/* - PA = Parallel Applying (on the slave side) -*/ extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe); -extern my_bool wsrep_thd_is_BF(THD *thd, my_bool sync); -extern my_bool wsrep_thd_is_wsrep(void *thd_ptr); +THD* wsrep_start_SR_THD(char *thread_stack); +void wsrep_end_SR_THD(THD* thd); + +/** + Helper functions to override error status + + In many contexts it is desirable to mask the original error status + set for THD or it is necessary to change OK status to error. + This function implements the common logic for the most + of the cases. + + Rules: + * If the diagnostics are has OK or EOF status, override it unconditionally + * If the error is either ER_ERROR_DURING_COMMIT or ER_LOCK_DEADLOCK + it is usually the correct error status to be returned to client, + so don't override those by default + */ + +static inline void wsrep_override_error(THD *thd, uint error) +{ + DBUG_ASSERT(error != ER_ERROR_DURING_COMMIT); + Diagnostics_area *da= thd->get_stmt_da(); + if (da->is_ok() || + da->is_eof() || + !da->is_set() || + (da->is_error() && + da->sql_errno() != error && + da->sql_errno() != ER_ERROR_DURING_COMMIT && + da->sql_errno() != ER_LOCK_DEADLOCK)) + { + da->reset_diagnostics_area(); + my_error(error, MYF(0)); + } +} + +/** + Override error with additional wsrep status. + */ +static inline void wsrep_override_error(THD *thd, uint error, + enum wsrep::provider::status status) +{ + Diagnostics_area *da= thd->get_stmt_da(); + if (da->is_ok() || + !da->is_set() || + (da->is_error() && + da->sql_errno() != error && + da->sql_errno() != ER_ERROR_DURING_COMMIT && + da->sql_errno() != ER_LOCK_DEADLOCK)) + { + da->reset_diagnostics_area(); + my_error(error, MYF(0), status); + } +} + +static inline void wsrep_override_error(THD* thd, + wsrep::client_error ce, + enum wsrep::provider::status status) +{ + DBUG_ASSERT(ce != wsrep::e_success); + switch (ce) + { + case wsrep::e_error_during_commit: + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, status); + break; + case wsrep::e_deadlock_error: + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + break; + case wsrep::e_interrupted_error: + wsrep_override_error(thd, ER_QUERY_INTERRUPTED); + break; + case wsrep::e_size_exceeded_error: + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, status); + break; + case wsrep::e_append_fragment_error: + /* TODO: Figure out better error number */ + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, status); + break; + case wsrep::e_not_supported_error: + wsrep_override_error(thd, ER_NOT_SUPPORTED_YET); + break; + case wsrep::e_timeout_error: + wsrep_override_error(thd, ER_LOCK_WAIT_TIMEOUT); + break; + default: + wsrep_override_error(thd, ER_UNKNOWN_ERROR); + break; + } +} -enum wsrep_conflict_state wsrep_thd_conflict_state(void *thd_ptr, my_bool sync); -extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); -extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); -extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); +/** + Helper function to log THD wsrep context. -#else /* WITH_WSREP */ + @param thd Pointer to THD + @param message Optional message + @param function Function where the call was made from + */ +static inline void wsrep_log_thd(THD *thd, + const char *message, + const char *function) +{ + WSREP_DEBUG("%s %s\n" + " thd: %llu thd_ptr: %p client_mode: %s client_state: %s trx_state: %s\n" + " next_trx_id: %lld trx_id: %lld seqno: %lld\n" + " is_streaming: %d fragments: %zu\n" + " sql_errno: %u message: %s\n" +#define WSREP_THD_LOG_QUERIES +#ifdef WSREP_THD_LOG_QUERIES + " command: %d query: %.72s" +#endif /* WSREP_OBSERVER_LOG_QUERIES */ + , + function, + message ? message : "", + thd->thread_id, + thd, + wsrep_thd_client_mode_str(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_transaction_state_str(thd), + (long long)thd->wsrep_next_trx_id(), + (long long)thd->wsrep_trx_id(), + (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_trx().is_streaming(), + thd->wsrep_sr().fragments().size(), + (thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0), + (thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->message() : "") +#ifdef WSREP_THD_LOG_QUERIES + , thd->lex->sql_command, + WSREP_QUERY(thd) +#endif /* WSREP_OBSERVER_LOG_QUERIES */ + ); +} -#define wsrep_thd_is_BF(T, S) (0) -#define wsrep_abort_thd(X,Y,Z) do { } while(0) -#define wsrep_create_appliers(T) do { } while(0) +#define WSREP_LOG_THD(thd_, message_) wsrep_log_thd(thd_, message_, __FUNCTION__) -#endif #endif /* WSREP_THD_H */ diff --git a/sql/wsrep_trans_observer.h b/sql/wsrep_trans_observer.h new file mode 100644 index 00000000000..a3acc9e78fb --- /dev/null +++ b/sql/wsrep_trans_observer.h @@ -0,0 +1,423 @@ +/* Copyright 2016 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef WSREP_TRANS_OBSERVER_H +#define WSREP_TRANS_OBSERVER_H + +#include "my_global.h" +#include "mysql/service_wsrep.h" +#include "wsrep_applier.h" /* wsrep_apply_error */ +#include "wsrep_xid.h" +#include "wsrep_thd.h" + +#include "my_dbug.h" + +class THD; + +/* + Return true if THD has active wsrep transaction. + */ +static inline bool wsrep_is_active(THD* thd) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none && + thd->wsrep_cs().transaction().active()); +} + +/* + Return true if THD is either committing a transaction or statement + is autocommit. + */ +static inline bool wsrep_is_real(THD* thd, bool all) +{ + return (all || thd->transaction.all.ha_list == 0); +} + +/* + Check if a transaction has generated changes. + */ +static inline bool wsrep_has_changes(THD* thd, my_bool all) +{ + return (thd->wsrep_trx().is_empty() == false); +} + +/* + Check if an active transaction has been BF aborted. + */ +static inline bool wsrep_is_bf_aborted(THD* thd) +{ + return (thd->wsrep_trx().active() && thd->wsrep_trx().bf_aborted()); +} + +static inline int wsrep_check_pk(THD* thd) +{ + if (!wsrep_certify_nonPK) + { + for (TABLE* table= thd->open_tables; table != NULL; table= table->next) + { + if (table->key_info == NULL || table->s->primary_key == MAX_KEY) + { + WSREP_DEBUG("No primary key found for table %s.%s", + table->s->db.str, table->s->table_name.str); + wsrep_override_error(thd, ER_LOCK_DEADLOCK); + return 1; + } + } + } + return 0; +} + +static inline bool wsrep_streaming_enabled(THD* thd) +{ + return (thd->wsrep_sr().fragment_size() > 0); +} + + +static inline int wsrep_start_transaction(THD* thd, wsrep_trx_id_t trx_id) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none ? + thd->wsrep_cs().start_transaction(wsrep::transaction_id(trx_id)) : + 0); +} + +/**/ +static inline int wsrep_start_trx_if_not_started(THD* thd) +{ + int ret= 0; + DBUG_ASSERT(thd->wsrep_next_trx_id() != WSREP_UNDEFINED_TRX_ID); + DBUG_ASSERT(thd->wsrep_cs().mode() == Wsrep_client_state::m_local); + if (thd->wsrep_trx().active() == false) + { + ret= wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + return ret; +} + +/* + Called after each row operation. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_row(THD* thd, bool) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none && + wsrep_thd_is_local(thd)) + { + if (wsrep_check_pk(thd)) + { + return 1; + } + else if (wsrep_streaming_enabled(thd)) + { + return thd->wsrep_cs().after_row(); + } + } + return 0; +} + +/* + Helper method to determine whether commit time hooks + should be run for the transaction. + */ +static inline bool wsrep_run_commit_hook(THD* thd, bool all) +{ + return (wsrep_is_real(thd, all) && wsrep_is_active(thd) && + (wsrep_thd_is_applying(thd) || wsrep_has_changes(thd, all))); +} + +/* + Called before the transaction is prepared. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_prepare(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_prepare"); + WSREP_DEBUG("wsrep_before_prepare: %d", wsrep_is_real(thd, all)); + int ret= 0; + if (wsrep_run_commit_hook(thd, all)) + { + if ((ret= thd->wsrep_cs().before_prepare()) == 0) + { + DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined()); + wsrep_xid_init(&thd->wsrep_xid, + thd->wsrep_trx().ws_meta().gtid()); + } + } + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been prepared. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_prepare(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_prepare"); + WSREP_DEBUG("wsrep_after_prepare: %d", wsrep_is_real(thd, all)); + int ret= (wsrep_run_commit_hook(thd, all) ? + thd->wsrep_cs().after_prepare() : 0); + DBUG_ASSERT(ret == 0 || thd->wsrep_cs().current_error() || + thd->wsrep_cs().transaction().state() == wsrep::transaction::s_must_replay); + DBUG_RETURN(ret); +} + + +/* + Called before the transaction is committed. + + This function must be called from both client and + applier contexts before commit. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_commit(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_commit"); + WSREP_DEBUG("wsrep_before_commit: %d, %lld", + wsrep_is_real(thd, all), + (long long)wsrep_thd_trx_seqno(thd)); + int ret= 0; + if (wsrep_run_commit_hook(thd, all)) + { + if ((ret= thd->wsrep_cs().before_commit()) == 0) + { + DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined()); + wsrep_xid_init(&thd->wsrep_xid, + thd->wsrep_trx().ws_meta().gtid()); + } + } + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been ordered for commit. + + This function must be called from both client and + applier contexts after the commit has been ordered. + + @param thd Pointer to THD + @param all + @param err Error buffer in case of applying error + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_ordered_commit(THD* thd, + bool all, + const wsrep_apply_error&) +{ + DBUG_ENTER("wsrep_ordered_commit"); + WSREP_DEBUG("wsrep_ordered_commit: %d", wsrep_is_real(thd, all)); + DBUG_RETURN(wsrep_run_commit_hook(thd, all) ? + thd->wsrep_cs().ordered_commit() : 0); +} + +/* + Called after the transaction has been committed. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_commit(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_commit"); + WSREP_DEBUG("wsrep_after_commit: %d, %d, %lld, %d", + wsrep_is_real(thd, all), + wsrep_is_active(thd), + (long long)wsrep_thd_trx_seqno(thd), + wsrep_has_changes(thd, all)); + if (wsrep_run_commit_hook(thd, all)) + { + DBUG_RETURN((wsrep_ordered_commit_if_no_binlog(thd, all) || + (thd->wsrep_xid.null(), + thd->wsrep_cs().after_commit()))); + } + DBUG_RETURN(0); +} + +/* + Called before the transaction is rolled back. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_before_rollback(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_before_rollback"); + int ret= 0; + if (wsrep_is_active(thd)) + { + if (!all && thd->in_active_multi_stmt_transaction() && + thd->wsrep_trx().is_streaming() && + !wsrep_stmt_rollback_is_safe(thd)) + { + /* Non-safe statement rollback during SR multi statement + transasction. Self abort the transaction, the actual rollback + and error handling will be done in after statement phase. */ + wsrep_thd_self_abort(thd); + ret= 0; + } + else if (wsrep_is_real(thd, all) && + thd->wsrep_trx().state() != wsrep::transaction::s_aborted) + { + /* Real transaction rolling back and wsrep abort not completed + yet */ + /* Reset XID so that it does not trigger writing serialization + history in InnoDB. This needs to be avoided because rollback + may happen out of order and replay may follow. */ + thd->wsrep_xid.null(); + ret= thd->wsrep_cs().before_rollback(); + } + } + DBUG_RETURN(ret); +} + +/* + Called after the transaction has been rolled back. + + Return zero on succes, non-zero on failure. + */ +static inline int wsrep_after_rollback(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_after_rollback"); + DBUG_RETURN((wsrep_is_real(thd, all) && wsrep_is_active(thd) && + thd->wsrep_cs().transaction().state() != + wsrep::transaction::s_aborted) ? + thd->wsrep_cs().after_rollback() : 0); +} + +static inline int wsrep_before_statement(THD* thd) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none ? + thd->wsrep_cs().before_statement() : 0); +} + +static inline +int wsrep_after_statement(THD* thd) +{ + DBUG_ENTER("wsrep_after_statement"); + DBUG_RETURN(thd->wsrep_cs().state() != wsrep::client_state::s_none ? + thd->wsrep_cs().after_statement() : 0); +} + +static inline void wsrep_after_apply(THD* thd) +{ + DBUG_ASSERT(wsrep_thd_is_applying(thd)); + WSREP_DEBUG("wsrep_after_apply %lld", thd->thread_id); + thd->wsrep_cs().after_applying(); +} + +static inline void wsrep_open(THD* thd) +{ + DBUG_ENTER("wsrep_open"); + if (wsrep_on(thd)) + { + thd->wsrep_cs().open(wsrep::client_id(thd->thread_id)); + thd->wsrep_cs().debug_log_level(wsrep_debug); + if (!thd->wsrep_applier && thd->variables.wsrep_trx_fragment_size) + { + thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } + } + DBUG_VOID_RETURN; +} + +static inline void wsrep_close(THD* thd) +{ + DBUG_ENTER("wsrep_close"); + if (thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + thd->wsrep_cs().close(); + } + DBUG_VOID_RETURN; +} + +static inline int wsrep_before_command(THD* thd) +{ + return (thd->wsrep_cs().state() != wsrep::client_state::s_none ? + thd->wsrep_cs().before_command() : 0); +} +/* + Called after each command. + + Return zero on success, non-zero on failure. +*/ +static inline void wsrep_after_command_before_result(THD* thd) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + thd->wsrep_cs().after_command_before_result(); + } +} + +static inline void wsrep_after_command_after_result(THD* thd) +{ + if (thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + thd->wsrep_cs().after_command_after_result(); + } +} + +static inline void wsrep_after_command_ignore_result(THD* thd) +{ + wsrep_after_command_before_result(thd); + DBUG_ASSERT(!thd->wsrep_cs().current_error()); + wsrep_after_command_after_result(thd); +} + +static inline enum wsrep::client_error wsrep_current_error(THD* thd) +{ + return thd->wsrep_cs().current_error(); +} + +static inline enum wsrep::provider::status +wsrep_current_error_status(THD* thd) +{ + return thd->wsrep_cs().current_error_status(); +} + + +/* + Commit an empty transaction. + + If the transaction is real and the wsrep transaction is still active, + the transaction did not generate any rows or keys and is committed + as empty. Here the wsrep transaction is rolled back and after statement + step is performed to leave the wsrep transaction in the state as it + never existed. +*/ +static inline void wsrep_commit_empty(THD* thd, bool all) +{ + DBUG_ENTER("wsrep_commit_empty"); + WSREP_DEBUG("wsrep_commit_empty(%llu)", thd->thread_id); + if (wsrep_is_real(thd, all) && + wsrep_thd_is_local(thd) && + thd->wsrep_trx().active() && + thd->wsrep_trx().state() != wsrep::transaction::s_committed) + { + bool have_error= wsrep_current_error(thd); + int ret= wsrep_before_rollback(thd, all) || + wsrep_after_rollback(thd, all) || + wsrep_after_statement(thd); + DBUG_ASSERT(have_error || !wsrep_current_error(thd)); + if (ret) + { + WSREP_DEBUG("wsrep_commit_empty failed: %d", wsrep_current_error(thd)); + } + } + DBUG_VOID_RETURN; +} + +#endif /* WSREP_TRANS_OBSERVER */ diff --git a/sql/wsrep_types.h b/sql/wsrep_types.h new file mode 100644 index 00000000000..9da00e305a7 --- /dev/null +++ b/sql/wsrep_types.h @@ -0,0 +1,29 @@ +/* Copyright 2018 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Wsrep typedefs to better conform to coding style. + */ +#ifndef WSREP_TYPES_H +#define WSREP_TYPES_H + +#include "wsrep/seqno.hpp" +#include "wsrep/view.hpp" + +typedef wsrep::id Wsrep_id; +typedef wsrep::seqno Wsrep_seqno; +typedef wsrep::view Wsrep_view; + +#endif /* WSREP_TYPES_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc index 3c341e222b3..8db0f7be99a 100644 --- a/sql/wsrep_utils.cc +++ b/sql/wsrep_utils.cc @@ -21,6 +21,8 @@ #endif #include "mariadb.h" +#include "my_global.h" +#include "wsrep_api.h" #include "wsrep_utils.h" #include "wsrep_mysqld.h" @@ -47,7 +49,7 @@ static wsp::string wsrep_PATH; void wsrep_prepend_PATH (const char* path) { - int count = 0; + int count= 0; while (environ[count]) { @@ -72,7 +74,7 @@ wsrep_prepend_PATH (const char* path) old_path + strlen("PATH=")); wsrep_PATH.set (new_path); - environ[count] = new_path; + environ[count]= new_path; } else { @@ -93,28 +95,28 @@ namespace wsp bool env::ctor_common(char** e) { - env_ = static_cast<char**>(malloc((len_ + 1) * sizeof(char*))); + env_= static_cast<char**>(malloc((len_ + 1) * sizeof(char*))); if (env_) { for (size_t i(0); i < len_; ++i) { assert(e[i]); // caller should make sure about len_ - env_[i] = strdup(e[i]); + env_[i]= strdup(e[i]); if (!env_[i]) { - errno_ = errno; + errno_= errno; WSREP_ERROR("Failed to allocate env. var: %s", e[i]); return true; } } - env_[len_] = NULL; + env_[len_]= NULL; return false; } else { - errno_ = errno; + errno_= errno; WSREP_ERROR("Failed to allocate env. var vector of length: %zu", len_); return true; } @@ -128,15 +130,15 @@ env::dtor() /* don't need to go beyond the first NULL */ for (size_t i(0); env_[i] != NULL; ++i) { free(env_[i]); } free(env_); - env_ = NULL; + env_= NULL; } - len_ = 0; + len_= 0; } env::env(char** e) : len_(0), env_(NULL), errno_(0) { - if (!e) { e = environ; } + if (!e) { e= environ; } /* count the size of the vector */ while (e[len_]) { ++len_; } @@ -154,21 +156,21 @@ env::~env() { dtor(); } int env::append(const char* val) { - char** tmp = static_cast<char**>(realloc(env_, (len_ + 2)*sizeof(char*))); + char** tmp= static_cast<char**>(realloc(env_, (len_ + 2)*sizeof(char*))); if (tmp) { - env_ = tmp; - env_[len_] = strdup(val); + env_= tmp; + env_[len_]= strdup(val); if (env_[len_]) { ++len_; - env_[len_] = NULL; + env_[len_]= NULL; } - else errno_ = errno; + else errno_= errno; } - else errno_ = errno; + else errno_= errno; return errno_; } @@ -189,7 +191,7 @@ process::process (const char* cmd, const char* type, char** env) if (0 == str_) { WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd)); - err_ = ENOMEM; + err_= ENOMEM; return; } @@ -205,12 +207,12 @@ process::process (const char* cmd, const char* type, char** env) return; } - if (NULL == env) { env = environ; } // default to global environment + if (NULL == env) { env= environ; } // default to global environment - int pipe_fds[2] = { -1, }; + int pipe_fds[2]= { -1, }; if (::pipe(pipe_fds)) { - err_ = errno; + err_= errno; WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_)); return; } @@ -220,16 +222,16 @@ process::process (const char* cmd, const char* type, char** env) int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ); int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD); - char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL }; + char* const pargv[4]= { strdup("sh"), strdup("-c"), strdup(str_), NULL }; if (!(pargv[0] && pargv[1] && pargv[2])) { - err_ = ENOMEM; + err_= ENOMEM; WSREP_ERROR ("Failed to allocate pargv[] array."); goto cleanup_pipe; } posix_spawnattr_t attr; - err_ = posix_spawnattr_init (&attr); + err_= posix_spawnattr_init (&attr); if (err_) { WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)", @@ -239,7 +241,7 @@ process::process (const char* cmd, const char* type, char** env) /* make sure that no signlas are masked in child process */ sigset_t sigmask_empty; sigemptyset(&sigmask_empty); - err_ = posix_spawnattr_setsigmask(&attr, &sigmask_empty); + err_= posix_spawnattr_setsigmask(&attr, &sigmask_empty); if (err_) { WSREP_ERROR ("posix_spawnattr_setsigmask() failed: %d (%s)", @@ -255,7 +257,7 @@ process::process (const char* cmd, const char* type, char** env) sigaddset(&default_signals, SIGPIPE); sigaddset(&default_signals, SIGTERM); sigaddset(&default_signals, SIGCHLD); - err_ = posix_spawnattr_setsigdefault(&attr, &default_signals); + err_= posix_spawnattr_setsigdefault(&attr, &default_signals); if (err_) { WSREP_ERROR ("posix_spawnattr_setsigdefault() failed: %d (%s)", @@ -263,7 +265,7 @@ process::process (const char* cmd, const char* type, char** env) goto cleanup_attr; } - err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | + err_= posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_USEVFORK); if (err_) @@ -274,7 +276,7 @@ process::process (const char* cmd, const char* type, char** env) } posix_spawn_file_actions_t fact; - err_ = posix_spawn_file_actions_init (&fact); + err_= posix_spawn_file_actions_init (&fact); if (err_) { WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)", @@ -283,7 +285,7 @@ process::process (const char* cmd, const char* type, char** env) } // close child's stdout|stdin depending on what we returning - err_ = posix_spawn_file_actions_addclose (&fact, close_fd); + err_= posix_spawn_file_actions_addclose (&fact, close_fd); if (err_) { WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)", @@ -292,7 +294,7 @@ process::process (const char* cmd, const char* type, char** env) } // substitute our pipe descriptor in place of the closed one - err_ = posix_spawn_file_actions_adddup2 (&fact, + err_= posix_spawn_file_actions_adddup2 (&fact, pipe_fds[child_end], close_fd); if (err_) { @@ -301,30 +303,30 @@ process::process (const char* cmd, const char* type, char** env) goto cleanup_fact; } - err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, env); + err_= posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, env); if (err_) { WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)", pargv[2], err_, strerror(err_)); - pid_ = 0; // just to make sure it was not messed up in the call + pid_= 0; // just to make sure it was not messed up in the call goto cleanup_fact; } - io_ = fdopen (pipe_fds[parent_end], type); + io_= fdopen (pipe_fds[parent_end], type); if (io_) { - pipe_fds[parent_end] = -1; // skip close on cleanup + pipe_fds[parent_end]= -1; // skip close on cleanup } else { - err_ = errno; + err_= errno; WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_)); } cleanup_fact: int err; // to preserve err_ code - err = posix_spawn_file_actions_destroy (&fact); + err= posix_spawn_file_actions_destroy (&fact); if (err) { WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n", @@ -332,7 +334,7 @@ cleanup_fact: } cleanup_attr: - err = posix_spawnattr_destroy (&attr); + err= posix_spawnattr_destroy (&attr); if (err) { WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)", @@ -360,7 +362,7 @@ process::~process () if (fclose (io_) == -1) { - err_ = errno; + err_= errno; WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_)); } } @@ -376,34 +378,34 @@ process::wait () int status; if (-1 == waitpid(pid_, &status, 0)) { - err_ = errno; assert (err_); + err_= errno; assert (err_); WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)", str_, (long)pid_, err_, strerror (err_)); } else { // command completed, check exit status if (WIFEXITED (status)) { - err_ = WEXITSTATUS (status); + err_= WEXITSTATUS (status); } else { // command didn't complete with exit() WSREP_ERROR("Process was aborted."); - err_ = errno ? errno : ECHILD; + err_= errno ? errno : ECHILD; } if (err_) { switch (err_) /* Translate error codes to more meaningful */ { - case 126: err_ = EACCES; break; /* Permission denied */ - case 127: err_ = ENOENT; break; /* No such file or directory */ - case 143: err_ = EINTR; break; /* Subprocess killed */ + case 126: err_= EACCES; break; /* Permission denied */ + case 127: err_= ENOENT; break; /* No such file or directory */ + case 143: err_= EINTR; break; /* Subprocess killed */ } WSREP_ERROR("Process completed with error: %s: %d (%s)", str_, err_, strerror(err_)); } - pid_ = 0; + pid_= 0; if (io_) fclose (io_); - io_ = NULL; + io_= NULL; } } else { @@ -421,7 +423,7 @@ thd::thd (my_bool won) : init(), ptr(new THD(0)) ptr->thread_stack= (char*) &ptr; ptr->store_globals(); ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog - ptr->variables.wsrep_on = won; + ptr->variables.wsrep_on= won; ptr->security_ctx->master_access= ~(ulong)0; lex_start(ptr); } @@ -441,7 +443,7 @@ thd::~thd () /* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ unsigned int wsrep_check_ip (const char* const addr, bool *is_ipv6) { - unsigned int ret = INADDR_NONE; + unsigned int ret= INADDR_NONE; struct addrinfo *res, hints; memset (&hints, 0, sizeof(hints)); @@ -451,7 +453,7 @@ unsigned int wsrep_check_ip (const char* const addr, bool *is_ipv6) *is_ipv6= false; - int gai_ret = getaddrinfo(addr, NULL, &hints, &res); + int gai_ret= getaddrinfo(addr, NULL, &hints, &res); if (0 == gai_ret) { if (AF_INET == res->ai_family) /* IPv4 */ @@ -488,7 +490,9 @@ size_t wsrep_guess_ip (char* buf, size_t buf_len) size_t ret= 0; // Attempt 1: Try to get the IP from bind-address. - if (my_bind_addr_str && my_bind_addr_str[0] != '\0') + // Skip if empty or bind-address=* + if (my_bind_addr_str && my_bind_addr_str[0] != '\0' && + strcmp(my_bind_addr_str, "*") != 0) { bool unused; unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str, &unused); @@ -539,7 +543,7 @@ size_t wsrep_guess_ip (char* buf, size_t buf_len) if (getifaddrs(&ifaddr) == 0) { - for (ifa= ifaddr; ifa != NULL; ifa = ifa->ifa_next) + for (ifa= ifaddr; ifa != NULL; ifa= ifa->ifa_next) { if (!ifa->ifa_addr) continue; diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h index 277cea9dc31..147da8e7c52 100644 --- a/sql/wsrep_utils.h +++ b/sql/wsrep_utils.h @@ -21,6 +21,27 @@ unsigned int wsrep_check_ip (const char* const addr, bool *is_ipv6); size_t wsrep_guess_ip (char* buf, size_t buf_len); +namespace wsp { +class node_status +{ +public: + node_status() : status(wsrep::server_state::s_disconnected) {} + void set(enum wsrep::server_state::state new_status, + const wsrep::view* view= 0) + { + if (status != new_status || 0 != view) + { + wsrep_notify_status(new_status, view); + status= new_status; + } + } + enum wsrep::server_state::state get() const { return status; } +private: + enum wsrep::server_state::state status; +}; +} /* namespace wsp */ + +extern wsp::node_status local_status; /* returns the length of the host part of the address string */ size_t wsrep_host_len(const char* addr, size_t addr_len); @@ -173,52 +194,37 @@ private: class Config_state { public: - Config_state() : view_(), status_(WSREP_MEMBER_UNDEFINED) + Config_state() : view_(), status_(wsrep::server_state::s_disconnected) {} - void set(wsrep_member_status_t status, const wsrep_view_info_t* view) + void set(const wsrep::view& view) { - wsrep_notify_status(status, view); + wsrep_notify_status(status_, &view); lock(); - - status_= status; - view_= *view; - member_info_.clear(); - - wsrep_member_info_t memb; - for(int i= 0; i < view->memb_num; i ++) - { - memb= view->members[i]; - member_info_.append_val(memb); - } - + view_= view; unlock(); } - void set(wsrep_member_status_t status) + void set(enum wsrep::server_state::state status) { - wsrep_notify_status(status, 0); + wsrep_notify_status(status); + lock(); status_= status; unlock(); } - wsrep_view_info_t get_view_info() const + const wsrep::view& get_view_info() const { return view_; } - wsrep_member_status_t get_status() const + enum wsrep::server_state::state get_status() const { return status_; } - Dynamic_array<wsrep_member_info_t> * get_member_info() - { - return &member_info_; - } - int lock() { return mysql_mutex_lock(&LOCK_wsrep_config_state); @@ -230,9 +236,8 @@ public: } private: - wsrep_view_info_t view_; - wsrep_member_status_t status_; - Dynamic_array<wsrep_member_info_t> member_info_; + wsrep::view view_; + enum wsrep::server_state::state status_; }; } /* namespace wsp */ @@ -308,12 +313,23 @@ public: string() : string_(0) {} explicit string(size_t s) : string_(static_cast<char*>(malloc(s))) {} char* operator()() { return string_; } - void set(char* str) { if (string_) free (string_); string_ = str; } + void set(char* str) { if (string_) free (string_); string_= str; } ~string() { set (0); } private: char* string_; }; +/* scope level lock */ +class auto_lock +{ +public: + auto_lock(mysql_mutex_t* m) : m_(m) { mysql_mutex_lock(m_); } + ~auto_lock() { mysql_mutex_unlock(m_); } +private: + mysql_mutex_t& operator =(mysql_mutex_t&); + mysql_mutex_t* const m_; +}; + #ifdef REMOVED class lock { @@ -323,7 +339,7 @@ public: lock (pthread_mutex_t* mtx) : mtx_(mtx) { - int err = pthread_mutex_lock (mtx_); + int err= pthread_mutex_lock (mtx_); if (err) { @@ -334,7 +350,7 @@ public: virtual ~lock () { - int err = pthread_mutex_unlock (mtx_); + int err= pthread_mutex_unlock (mtx_); if (err) { diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 1471ad91a96..8a0968639c2 100644 --- a/sql/wsrep_var.cc +++ b/sql/wsrep_var.cc @@ -28,8 +28,6 @@ ulong wsrep_reject_queries; -static long wsrep_prev_slave_threads = wsrep_slave_threads; - int wsrep_init_vars() { wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME)); @@ -53,7 +51,7 @@ bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) { if (var_type == OPT_GLOBAL) { // FIXME: this variable probably should be changed only per session - thd->variables.wsrep_on = global_system_variables.wsrep_on; + thd->variables.wsrep_on= global_system_variables.wsrep_on; } return false; @@ -68,8 +66,8 @@ bool wsrep_on_check(sys_var *self, THD* thd, set_var* var) if (new_wsrep_on && innodb_hton_ptr && innodb_lock_schedule_algorithm != 0) { my_message(ER_WRONG_ARGUMENTS, " WSREP (galera) can't be enabled " - "if innodb_lock_schedule_algorithm=VATS. Please configure" - " innodb_lock_schedule_algorithm=FCFS and restart.", MYF(0)); + "if innodb_lock_schedule_algorithm=VATS. Please configure" + " innodb_lock_schedule_algorithm=FCFS and restart.", MYF(0)); return true; } return false; @@ -77,10 +75,6 @@ bool wsrep_on_check(sys_var *self, THD* thd, set_var* var) bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) { - // global setting should not affect session setting. - // if (var_type == OPT_GLOBAL) { - // thd->variables.wsrep_causal_reads = global_system_variables.wsrep_causal_reads; - // } if (thd->variables.wsrep_causal_reads) { thd->variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; } else { @@ -99,15 +93,11 @@ bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type) { - // global setting should not affect session setting. - // if (var_type == OPT_GLOBAL) { - // thd->variables.wsrep_sync_wait = global_system_variables.wsrep_sync_wait; - // } - thd->variables.wsrep_causal_reads = thd->variables.wsrep_sync_wait & + thd->variables.wsrep_causal_reads= thd->variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ; // update global settings too - global_system_variables.wsrep_causal_reads = global_system_variables.wsrep_sync_wait & + global_system_variables.wsrep_causal_reads= global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ; return false; @@ -129,7 +119,7 @@ bool wsrep_start_position_verify (const char* start_str) ssize_t uuid_len; // Check whether it has minimum acceptable length. - start_len = strlen (start_str); + start_len= strlen (start_str); if (start_len < 34) return true; @@ -137,7 +127,7 @@ bool wsrep_start_position_verify (const char* start_str) Parse the input to check whether UUID length is acceptable and seqno has been provided. */ - uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid); + uuid_len= wsrep_uuid_scan (start_str, start_len, &uuid); if (uuid_len < 0 || (start_len - uuid_len) < 2) return true; @@ -157,19 +147,18 @@ bool wsrep_start_position_verify (const char* start_str) static -bool wsrep_set_local_position(const char* const value, size_t length, - bool const sst) +bool wsrep_set_local_position(THD* thd, const char* const value, + size_t length, bool const sst) { wsrep_uuid_t uuid; - size_t const uuid_len = wsrep_uuid_scan(value, length, &uuid); - wsrep_seqno_t const seqno = strtoll(value + uuid_len + 1, NULL, 10); + size_t const uuid_len= wsrep_uuid_scan(value, length, &uuid); + wsrep_seqno_t const seqno= strtoll(value + uuid_len + 1, NULL, 10); if (sst) { - return wsrep_sst_received (wsrep, uuid, seqno, NULL, 0, false); + wsrep_sst_received (thd, uuid, seqno, NULL, 0); } else { - // initialization - local_uuid = uuid; - local_seqno = seqno; + local_uuid= uuid; + local_seqno= seqno; } return false; } @@ -194,7 +183,7 @@ bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var) As part of further verification, we try to update the value and catch errors (if any). */ - if (wsrep_set_local_position(var->save_result.string_value.str, + if (wsrep_set_local_position(thd, var->save_result.string_value.str, var->save_result.string_value.length, true)) { @@ -226,7 +215,7 @@ bool wsrep_start_position_init (const char* val) return true; } - if (wsrep_set_local_position (val, strlen(val), false)) + if (wsrep_set_local_position (NULL, val, strlen(val), false)) { WSREP_ERROR("Failed to set initial wsep_start_position: %s", val); return true; @@ -263,25 +252,23 @@ end: static bool refresh_provider_options() { - DBUG_ASSERT(wsrep); - WSREP_DEBUG("refresh_provider_options: %s", (wsrep_provider_options) ? wsrep_provider_options : "null"); - char* opts= wsrep->options_get(wsrep); - if (opts) + + try { - wsrep_provider_options_init(opts); + std::string opts= Wsrep_server_state::instance().provider().options(); + wsrep_provider_options_init(opts.c_str()); get_provider_option_value(wsrep_provider_options, (char*)"repl.max_ws_size", &wsrep_max_ws_size); - free(opts); + return false; } - else + catch (...) { WSREP_ERROR("Failed to get provider options"); return true; } - return false; } static int wsrep_provider_verify (const char* provider_str) @@ -332,8 +319,6 @@ bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) { bool rcode= false; - bool wsrep_on_saved= thd->variables.wsrep_on; - thd->variables.wsrep_on= false; WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider); @@ -346,7 +331,12 @@ bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) */ mysql_mutex_unlock(&LOCK_global_system_variables); wsrep_stop_replication(thd); - mysql_mutex_lock(&LOCK_global_system_variables); + + /* provider status variables are allocated in provider library + and need to freed here, otherwise a dangling reference to + wsrep_status_vars would remain in THD + */ + wsrep_free_status(thd); if (wsrep_inited == 1) wsrep_deinit(false); @@ -357,17 +347,17 @@ bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) if (wsrep_init()) { my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp, my_error, "wsrep_init failed"); - rcode = true; + rcode= true; } free(tmp); // we sure don't want to use old address with new provider wsrep_cluster_address_init(NULL); wsrep_provider_options_init(NULL); + if (!rcode) + refresh_provider_options(); - thd->variables.wsrep_on= wsrep_on_saved; - - refresh_provider_options(); + mysql_mutex_lock(&LOCK_global_system_variables); return rcode; } @@ -385,12 +375,12 @@ void wsrep_provider_init (const char* value) } if (wsrep_provider) my_free((void *)wsrep_provider); - wsrep_provider = my_strdup(value, MYF(0)); + wsrep_provider= my_strdup(value, MYF(0)); } bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) { - if (wsrep == NULL) + if (!WSREP_ON) { my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); return true; @@ -400,9 +390,9 @@ bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type) { - DBUG_ASSERT(wsrep); - wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options); - if (ret != WSREP_OK) + enum wsrep::provider::status ret= + Wsrep_server_state::instance().provider().options(wsrep_provider_options); + if (ret) { WSREP_ERROR("Set options returned %d", ret); refresh_provider_options(); @@ -415,7 +405,7 @@ void wsrep_provider_options_init(const char* value) { if (wsrep_provider_options && wsrep_provider_options != value) my_free((void *)wsrep_provider_options); - wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL; + wsrep_provider_options= (value) ? my_strdup(value, MYF(0)) : NULL; } bool wsrep_reject_queries_update(sys_var *self, THD* thd, enum_var_type type) @@ -469,18 +459,12 @@ bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var) bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) { - bool wsrep_on_saved; - - /* Do not proceed if wsrep provider is not loaded. */ - if (!wsrep) + if (!Wsrep_server_state::instance().is_provider_loaded()) { - WSREP_INFO("wsrep provider is not loaded, can't re(start) replication."); + WSREP_INFO("WSREP (galera) provider is not loaded, can't re(start) replication."); return false; } - wsrep_on_saved= thd->variables.wsrep_on; - thd->variables.wsrep_on= false; - /* stop replication is heavy operation, and includes closing all client connections. Closing clients may need to get LOCK_global_system_variables at least in MariaDB. @@ -491,13 +475,6 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) mysql_mutex_unlock(&LOCK_global_system_variables); wsrep_stop_replication(thd); - /* - Unlock and lock LOCK_wsrep_slave_threads to maintain lock order & avoid - any potential deadlock. - */ - mysql_mutex_unlock(&LOCK_wsrep_slave_threads); - mysql_mutex_lock(&LOCK_global_system_variables); - mysql_mutex_lock(&LOCK_wsrep_slave_threads); if (wsrep_start_replication()) { @@ -505,7 +482,13 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) wsrep_create_appliers(wsrep_slave_threads); } - thd->variables.wsrep_on= wsrep_on_saved; + /* locking order to be enforced is: + 1. LOCK_global_system_variables + 2. LOCK_wsrep_slave_threads + */ + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + mysql_mutex_lock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_wsrep_slave_threads); return false; } @@ -590,15 +573,14 @@ void wsrep_node_address_init (const char* value) if (wsrep_node_address && strcmp(wsrep_node_address, value)) my_free ((void*)wsrep_node_address); - wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL; + wsrep_node_address= (value) ? my_strdup(value, MYF(0)) : NULL; } static void wsrep_slave_count_change_update () { - wsrep_slave_count_change = (wsrep_slave_threads - wsrep_prev_slave_threads); + wsrep_slave_count_change= (wsrep_slave_threads - wsrep_running_threads + 2); WSREP_DEBUG("Change on slave threads: New %lu old %lu difference %d", - wsrep_slave_threads, wsrep_prev_slave_threads, wsrep_slave_count_change); - wsrep_prev_slave_threads = wsrep_slave_threads; + wsrep_slave_threads, wsrep_running_threads, wsrep_slave_count_change); } bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) @@ -607,14 +589,14 @@ bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) if (wsrep_slave_count_change > 0) { wsrep_create_appliers(wsrep_slave_count_change); - wsrep_slave_count_change = 0; + wsrep_slave_count_change= 0; } return false; } bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) { - if (wsrep == NULL) + if (!WSREP_ON) { my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); return true; @@ -639,17 +621,17 @@ bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) } return false; } - wsrep_status_t ret(WSREP_WARNING); + int ret= 1; if (new_wsrep_desync) { - ret = wsrep->desync (wsrep); - if (ret != WSREP_OK) { - WSREP_WARN ("SET desync failed %d for schema: %s, query: %s", - ret, thd->get_db(), thd->query()); + ret= Wsrep_server_state::instance().provider().desync(); + if (ret) { + WSREP_WARN ("SET desync failed %d for schema: %s, query: %s", ret, + thd->db.str, WSREP_QUERY(thd)); my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query()); return true; } } else { - ret = wsrep->resync (wsrep); + ret= Wsrep_server_state::instance().provider().resync(); if (ret != WSREP_OK) { WSREP_WARN ("SET resync failed %d for schema: %s, query: %s", ret, thd->get_db(), thd->query()); @@ -662,13 +644,70 @@ bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type) { - DBUG_ASSERT(wsrep); + return false; +} + +bool wsrep_trx_fragment_size_check (sys_var *self, THD* thd, set_var* var) +{ + if (var->value == NULL) { + return false; + } + + const ulong new_trx_fragment_size= var->value->val_uint(); + + if (!WSREP(thd) && new_trx_fragment_size > 0) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' to a value other than " + "0 because wsrep is switched off."); + return true; + } + + if (new_trx_fragment_size > 0 && !wsrep_provider_is_SR_capable()) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "Cannot set 'wsrep_trx_fragment_size' to a value other than " + "0 because the wsrep_provider does not support streaming " + "replication."); + return true; + } + + return false; +} + +bool wsrep_trx_fragment_size_update(sys_var* self, THD *thd, enum_var_type) +{ + WSREP_DEBUG("wsrep_trx_fragment_size_update: %llu", + thd->variables.wsrep_trx_fragment_size); + if (thd->variables.wsrep_trx_fragment_size) + { + return thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } + else + { + thd->wsrep_cs().disable_streaming(); + return false; + } +} + +bool wsrep_trx_fragment_unit_update(sys_var* self, THD *thd, enum_var_type) +{ + WSREP_DEBUG("wsrep_trx_fragment_unit_update: %lu", + thd->variables.wsrep_trx_fragment_unit); + if (thd->variables.wsrep_trx_fragment_size) + { + return thd->wsrep_cs().enable_streaming( + wsrep_fragment_unit(thd->variables.wsrep_trx_fragment_unit), + size_t(thd->variables.wsrep_trx_fragment_size)); + } return false; } bool wsrep_max_ws_size_check(sys_var *self, THD* thd, set_var* var) { - if (wsrep == NULL) + if (!WSREP_ON) { my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) not started", MYF(0)); return true; @@ -676,36 +715,35 @@ bool wsrep_max_ws_size_check(sys_var *self, THD* thd, set_var* var) return false; } -bool wsrep_max_ws_size_update (sys_var *self, THD *thd, enum_var_type) +bool wsrep_max_ws_size_update(sys_var *self, THD *thd, enum_var_type) { - DBUG_ASSERT(wsrep); - char max_ws_size_opt[128]; my_snprintf(max_ws_size_opt, sizeof(max_ws_size_opt), - "repl.max_ws_size=%lu", wsrep_max_ws_size); - wsrep_status_t ret= wsrep->options_set(wsrep, max_ws_size_opt); - if (ret != WSREP_OK) + "repl.max_ws_size=%d", wsrep_max_ws_size); + enum wsrep::provider::status ret= Wsrep_server_state::instance().provider().options(max_ws_size_opt); + if (ret) { WSREP_ERROR("Set options returned %d", ret); - refresh_provider_options(); return true; } return refresh_provider_options(); } +#if UNUSED /* eaec266eb16c (Sergei Golubchik 2014-09-28) */ static SHOW_VAR wsrep_status_vars[]= { {"connected", (char*) &wsrep_connected, SHOW_BOOL}, - {"ready", (char*) &wsrep_ready, SHOW_BOOL}, + {"ready", (char*) &wsrep_show_ready, SHOW_FUNC}, {"cluster_state_uuid",(char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, {"cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, {"cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, {"cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, {"local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, - {"local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_SIMPLE_FUNC}, + {"local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_FUNC}, {"provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, {"provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, {"provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep_provider_capabilities", (char*) &wsrep_provider_capabilities, SHOW_CHAR_PTR}, {"thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH} }; @@ -713,49 +751,90 @@ static int show_var_cmp(const void *var1, const void *var2) { return strcasecmp(((SHOW_VAR*)var1)->name, ((SHOW_VAR*)var2)->name); } +#endif /* UNUSED */ +/* + * Status variables stuff below + */ +static inline void +wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep_var) +{ + mysql->name= wsrep_var->name; + switch (wsrep_var->type) { + case WSREP_VAR_INT64: + mysql->value= (char*) &wsrep_var->value._int64; + mysql->type= SHOW_LONGLONG; + break; + case WSREP_VAR_STRING: + mysql->value= (char*) &wsrep_var->value._string; + mysql->type= SHOW_CHAR_PTR; + break; + case WSREP_VAR_DOUBLE: + mysql->value= (char*) &wsrep_var->value._double; + mysql->type= SHOW_DOUBLE; + break; + } +} -int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff, - enum enum_var_type scope) +#if DYNAMIC +// somehow this mysql status thing works only with statically allocated arrays. +static SHOW_VAR* mysql_status_vars= NULL; +static int mysql_status_len= -1; +#else +static SHOW_VAR mysql_status_vars[512 + 1]; +static const int mysql_status_len= 512; +#endif + +static void export_wsrep_status_to_mysql(THD* thd) { - uint i, maxi= SHOW_VAR_FUNC_BUFF_SIZE / sizeof(*var) - 1; - SHOW_VAR *v= (SHOW_VAR *)buff; + int wsrep_status_len, i; - var->type= SHOW_ARRAY; - var->value= buff; + thd->wsrep_status_vars= Wsrep_server_state::instance().status(); - for (i=0; i < array_elements(wsrep_status_vars); i++) - *v++= wsrep_status_vars[i]; + wsrep_status_len= thd->wsrep_status_vars.size(); - DBUG_ASSERT(i < maxi); +#if DYNAMIC + if (wsrep_status_len != mysql_status_len) { + void* tmp= realloc (mysql_status_vars, + (wsrep_status_len + 1) * sizeof(SHOW_VAR)); + if (!tmp) { - if (wsrep != NULL) - { - wsrep_stats_var* stats= wsrep->stats_get(wsrep); - for (wsrep_stats_var *sv= stats; - i < maxi && sv && sv->name; i++, - sv++, v++) - { - v->name = thd->strdup(sv->name); - switch (sv->type) { - case WSREP_VAR_INT64: - v->value = (char*)thd->memdup(&sv->value._integer64, sizeof(longlong)); - v->type = SHOW_LONGLONG; - break; - case WSREP_VAR_STRING: - v->value = thd->strdup(sv->value._string); - v->type = SHOW_CHAR; - break; - case WSREP_VAR_DOUBLE: - v->value = (char*)thd->memdup(&sv->value._double, sizeof(double)); - v->type = SHOW_DOUBLE; - break; - } + sql_print_error ("Out of memory for wsrep status variables." + "Number of variables: %d", wsrep_status_len); + return; } - wsrep->stats_free(wsrep, stats); + + mysql_status_len= wsrep_status_len; + mysql_status_vars= (SHOW_VAR*)tmp; + } + /* @TODO: fix this: */ +#else + if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len; +#endif + + for (i= 0; i < wsrep_status_len; i++) + { + mysql_status_vars[i].name= (char*)thd->wsrep_status_vars[i].name().c_str(); + mysql_status_vars[i].value= (char*)thd->wsrep_status_vars[i].value().c_str(); + mysql_status_vars[i].type= SHOW_CHAR; } - my_qsort(buff, i, sizeof(*v), show_var_cmp); + mysql_status_vars[wsrep_status_len].name = NullS; + mysql_status_vars[wsrep_status_len].value = NullS; + mysql_status_vars[wsrep_status_len].type = SHOW_LONG; +} - v->name= 0; // terminator +int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff) +{ + if (WSREP_ON) + { + export_wsrep_status_to_mysql(thd); + var->type= SHOW_ARRAY; + var->value= (char *) &mysql_status_vars; + } return 0; } + +void wsrep_free_status (THD* thd) +{ + thd->wsrep_status_vars.clear(); +} diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h index 7d3ff50f1d2..b732fb48b38 100644 --- a/sql/wsrep_var.h +++ b/sql/wsrep_var.h @@ -13,11 +13,11 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include <my_config.h> - #ifndef WSREP_VAR_H #define WSREP_VAR_H +#include <my_config.h> + #ifdef WITH_WSREP #define WSREP_CLUSTER_NAME "my_wsrep_cluster" @@ -90,13 +90,18 @@ extern bool wsrep_slave_threads_update UPDATE_ARGS; extern bool wsrep_desync_check CHECK_ARGS; extern bool wsrep_desync_update UPDATE_ARGS; +extern bool wsrep_trx_fragment_size_check CHECK_ARGS; +extern bool wsrep_trx_fragment_size_update UPDATE_ARGS; + +extern bool wsrep_trx_fragment_unit_update UPDATE_ARGS; + extern bool wsrep_max_ws_size_check CHECK_ARGS; extern bool wsrep_max_ws_size_update UPDATE_ARGS; + extern bool wsrep_reject_queries_update UPDATE_ARGS; #else /* WITH_WSREP */ -#define WSREP_NONE #define wsrep_provider_init(X) #define wsrep_init_vars() (0) #define wsrep_start_position_init(X) diff --git a/sql/wsrep_xid.cc b/sql/wsrep_xid.cc index 2834100568a..a1c454d9d65 100644 --- a/sql/wsrep_xid.cc +++ b/sql/wsrep_xid.cc @@ -21,6 +21,9 @@ #include "sql_class.h" #include "wsrep_mysqld.h" // for logging macros +#include <mysql/service_wsrep.h> + +#include <algorithm> /* std::sort() */ /* * WSREPXid */ @@ -34,20 +37,22 @@ #define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t)) #define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) -void wsrep_xid_init(XID* xid, const wsrep_uuid_t& uuid, wsrep_seqno_t seqno) +void wsrep_xid_init(XID* xid, const wsrep::gtid& wsgtid) { xid->formatID= 1; xid->gtrid_length= WSREP_XID_GTRID_LEN; xid->bqual_length= 0; memset(xid->data, 0, sizeof(xid->data)); memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN); - xid->data[WSREP_XID_VERSION_OFFSET] = WSREP_XID_VERSION_2; - memcpy(xid->data + WSREP_XID_UUID_OFFSET, &uuid, sizeof(wsrep_uuid_t)); - int8store(xid->data + WSREP_XID_SEQNO_OFFSET,seqno); + xid->data[WSREP_XID_VERSION_OFFSET]= WSREP_XID_VERSION_2; + memcpy(xid->data + WSREP_XID_UUID_OFFSET, wsgtid.id().data(),sizeof(wsrep::id)); + int8store(xid->data + WSREP_XID_SEQNO_OFFSET, wsgtid.seqno().get()); } -int wsrep_is_wsrep_xid(const XID* xid) +extern "C" +int wsrep_is_wsrep_xid(const void* xid_ptr) { + const XID* xid= static_cast<const XID*>(xid_ptr); return (xid->formatID == 1 && xid->gtrid_length == WSREP_XID_GTRID_LEN && xid->bqual_length == 0 && @@ -56,33 +61,36 @@ int wsrep_is_wsrep_xid(const XID* xid) xid->data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_2)); } -const wsrep_uuid_t* wsrep_xid_uuid(const XID& xid) +const unsigned char* wsrep_xid_uuid(const xid_t* xid) { - if (wsrep_is_wsrep_xid(&xid)) - return reinterpret_cast<const wsrep_uuid_t*>(xid.data - + WSREP_XID_UUID_OFFSET); + DBUG_ASSERT(xid); + static wsrep::id const undefined; + if (wsrep_is_wsrep_xid(xid)) + return reinterpret_cast<const unsigned char*> + (xid->data + WSREP_XID_UUID_OFFSET); else - return &WSREP_UUID_UNDEFINED; + return static_cast<const unsigned char*>(wsrep::id::undefined().data()); } -const unsigned char* wsrep_xid_uuid(const xid_t* xid) +const wsrep::id& wsrep_xid_uuid(const XID& xid) { - DBUG_ASSERT(xid); - return wsrep_xid_uuid(*xid)->data; + compile_time_assert(sizeof(wsrep::id) == sizeof(wsrep_uuid_t)); + return *reinterpret_cast<const wsrep::id*>(wsrep_xid_uuid(&xid)); } -wsrep_seqno_t wsrep_xid_seqno(const XID& xid) +long long wsrep_xid_seqno(const xid_t* xid) { - wsrep_seqno_t ret= WSREP_SEQNO_UNDEFINED; - if (wsrep_is_wsrep_xid(&xid)) + DBUG_ASSERT(xid); + long long ret= wsrep::seqno::undefined().get(); + if (wsrep_is_wsrep_xid(xid)) { - switch (xid.data[WSREP_XID_VERSION_OFFSET]) + switch (xid->data[WSREP_XID_VERSION_OFFSET]) { case WSREP_XID_VERSION_1: - memcpy(&ret, xid.data + WSREP_XID_SEQNO_OFFSET, sizeof ret); + memcpy(&ret, xid->data + WSREP_XID_SEQNO_OFFSET, sizeof ret); break; case WSREP_XID_VERSION_2: - ret= sint8korr(xid.data + WSREP_XID_SEQNO_OFFSET); + ret= sint8korr(xid->data + WSREP_XID_SEQNO_OFFSET); break; default: break; @@ -91,10 +99,9 @@ wsrep_seqno_t wsrep_xid_seqno(const XID& xid) return ret; } -long long wsrep_xid_seqno(const xid_t* xid) +wsrep::seqno wsrep_xid_seqno(const XID& xid) { - DBUG_ASSERT(xid); - return wsrep_xid_seqno(*xid); + return wsrep::seqno(wsrep_xid_seqno(&xid)); } static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) @@ -104,11 +111,11 @@ static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) if (hton->set_checkpoint) { - const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid)); - char uuid_str[40] = {0, }; - wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + const unsigned char* uuid= wsrep_xid_uuid(xid); + char uuid_str[40]= {0, }; + wsrep_uuid_print((const wsrep_uuid_t*)uuid, uuid_str, sizeof(uuid_str)); WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", - uuid_str, (long long)wsrep_xid_seqno(*xid)); + uuid_str, (long long)wsrep_xid_seqno(xid)); hton->set_checkpoint(hton, xid); } return FALSE; @@ -120,10 +127,10 @@ bool wsrep_set_SE_checkpoint(XID& xid) &xid); } -bool wsrep_set_SE_checkpoint(const wsrep_uuid_t& uuid, wsrep_seqno_t seqno) +bool wsrep_set_SE_checkpoint(const wsrep::gtid& wsgtid) { XID xid; - wsrep_xid_init(&xid, uuid, seqno); + wsrep_xid_init(&xid, wsgtid); return wsrep_set_SE_checkpoint(xid); } @@ -135,11 +142,12 @@ static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) if (hton->get_checkpoint) { hton->get_checkpoint(hton, xid); - const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid)); - char uuid_str[40] = {0, }; - wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + wsrep_uuid_t uuid; + memcpy(&uuid, wsrep_xid_uuid(xid), sizeof(uuid)); + char uuid_str[40]= {0, }; + wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", - uuid_str, (long long)wsrep_xid_seqno(*xid)); + uuid_str, (long long)wsrep_xid_seqno(xid)); } return FALSE; } @@ -150,32 +158,58 @@ bool wsrep_get_SE_checkpoint(XID& xid) &xid); } -bool wsrep_get_SE_checkpoint(wsrep_uuid_t& uuid, wsrep_seqno_t& seqno) +wsrep::gtid wsrep_get_SE_checkpoint() { - uuid= WSREP_UUID_UNDEFINED; - seqno= WSREP_SEQNO_UNDEFINED; - XID xid; xid.null(); if (wsrep_get_SE_checkpoint(xid)) { - return true; + return wsrep::gtid(); } if (xid.is_null()) { - return false; + return wsrep::gtid(); } if (!wsrep_is_wsrep_xid(&xid)) { WSREP_WARN("Read non-wsrep XID from storage engines."); - return false; + return wsrep::gtid(); } - uuid= *wsrep_xid_uuid(xid); - seqno= wsrep_xid_seqno(xid); + return wsrep::gtid(wsrep_xid_uuid(xid),wsrep_xid_seqno(xid)); +} + +/* + Sort order for XIDs. Wsrep XIDs are sorted according to + seqno in ascending order. Non-wsrep XIDs are considered + equal among themselves and greater than with respect + to wsrep XIDs. + */ +struct Wsrep_xid_cmp +{ + bool operator()(const XID& left, const XID& right) const + { + const bool left_is_wsrep= wsrep_is_wsrep_xid(&left); + const bool right_is_wsrep= wsrep_is_wsrep_xid(&right); + if (left_is_wsrep && right_is_wsrep) + { + return (wsrep_xid_seqno(&left) < wsrep_xid_seqno(&right)); + } + else if (left_is_wsrep) + { + return true; + } + else + { + return false; + } + } +}; - return false; +void wsrep_sort_xid_array(XID *array, int len) +{ + std::sort(array, array + len, Wsrep_xid_cmp()); } diff --git a/sql/wsrep_xid.h b/sql/wsrep_xid.h index 5b33a904de1..e41f6fba420 100644 --- a/sql/wsrep_xid.h +++ b/sql/wsrep_xid.h @@ -20,17 +20,19 @@ #ifdef WITH_WSREP -#include "../wsrep/wsrep_api.h" +#include "wsrep/gtid.hpp" #include "handler.h" // XID typedef -void wsrep_xid_init(xid_t*, const wsrep_uuid_t&, wsrep_seqno_t); -const wsrep_uuid_t* wsrep_xid_uuid(const XID&); -wsrep_seqno_t wsrep_xid_seqno(const XID&); +void wsrep_xid_init(xid_t*, const wsrep::gtid&); +const wsrep::id& wsrep_xid_uuid(const XID&); +wsrep::seqno wsrep_xid_seqno(const XID&); +wsrep::gtid wsrep_get_SE_checkpoint(); +bool wsrep_set_SE_checkpoint(const wsrep::gtid& gtid); //void wsrep_get_SE_checkpoint(XID&); /* uncomment if needed */ -bool wsrep_get_SE_checkpoint(wsrep_uuid_t&, wsrep_seqno_t&); //void wsrep_set_SE_checkpoint(XID&); /* uncomment if needed */ -bool wsrep_set_SE_checkpoint(const wsrep_uuid_t&, wsrep_seqno_t); + +void wsrep_sort_xid_array(XID *array, int len); #endif /* WITH_WSREP */ #endif /* WSREP_UTILS_H */ |