summaryrefslogtreecommitdiff
path: root/storage/innobase/trx
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/trx')
-rw-r--r--storage/innobase/trx/trx0i_s.cc160
-rw-r--r--storage/innobase/trx/trx0purge.cc1080
-rw-r--r--storage/innobase/trx/trx0rec.cc658
-rw-r--r--storage/innobase/trx/trx0roll.cc547
-rw-r--r--storage/innobase/trx/trx0rseg.cc714
-rw-r--r--storage/innobase/trx/trx0sys.cc818
-rw-r--r--storage/innobase/trx/trx0trx.cc1562
-rw-r--r--storage/innobase/trx/trx0undo.cc1259
8 files changed, 2674 insertions, 4124 deletions
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index 6dba53dee6b..a39fb5d2e95 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -44,8 +44,8 @@ Created July 17, 2007 Vasil Dimov
#include "sync0rw.h"
#include "sync0sync.h"
#include "trx0sys.h"
-
-#include <sql_class.h>
+#include "que0que.h"
+#include "trx0purge.h"
/** Initial number of rows in the table cache */
#define TABLE_CACHE_INITIAL_ROWSNUM 1024
@@ -161,10 +161,10 @@ struct trx_i_s_cache_t {
ha_storage_t* storage; /*!< storage for external volatile
data that may become unavailable
when we release
- lock_sys->mutex or trx_sys->mutex */
+ lock_sys.mutex or trx_sys.mutex */
ulint mem_allocd; /*!< the amount of memory
allocated with mem_alloc*() */
- ibool is_truncated; /*!< this is TRUE if the memory
+ bool is_truncated; /*!< this is true if the memory
limit was hit and thus the data
in the cache is truncated */
};
@@ -526,9 +526,9 @@ thd_done:
row->trx_tables_locked = lock_number_of_tables_locked(&trx->lock);
- /* These are protected by both trx->mutex or lock_sys->mutex,
- or just lock_sys->mutex. For reading, it suffices to hold
- lock_sys->mutex. */
+ /* These are protected by both trx->mutex or lock_sys.mutex,
+ or just lock_sys.mutex. For reading, it suffices to hold
+ lock_sys.mutex. */
row->trx_lock_structs = UT_LIST_GET_LEN(trx->lock.trx_locks);
@@ -1222,102 +1222,64 @@ trx_i_s_cache_clear(
ha_storage_empty(&cache->storage);
}
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache_low(
-/*======================*/
- trx_i_s_cache_t* cache, /*!< in/out: cache */
- bool read_write, /*!< in: only read-write
- transactions */
- trx_ut_list_t* trx_list) /*!< in: trx list */
-{
- const trx_t* trx;
- bool rw_trx_list = trx_list == &trx_sys->rw_trx_list;
-
- ut_ad(rw_trx_list || trx_list == &trx_sys->mysql_trx_list);
-
- /* Iterate over the transaction list and add each one
- to innodb_trx's cache. We also add all locks that are relevant
- to each transaction into innodb_locks' and innodb_lock_waits'
- caches. */
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- trx =
- (rw_trx_list
- ? UT_LIST_GET_NEXT(trx_list, trx)
- : UT_LIST_GET_NEXT(mysql_trx_list, trx))) {
-
- i_s_trx_row_t* trx_row;
- i_s_locks_row_t* requested_lock_row;
- /* Note: Read only transactions that modify temporary
- tables an have a transaction ID */
- if (!trx_is_started(trx)
- || (!rw_trx_list && trx->id != 0 && !trx->read_only)) {
+/**
+ Add transactions to innodb_trx's cache.
- continue;
- }
-
- assert_trx_nonlocking_or_in_list(trx);
-
- ut_ad(trx->in_rw_trx_list == rw_trx_list);
-
- if (!add_trx_relevant_locks_to_cache(cache, trx,
- &requested_lock_row)) {
-
- cache->is_truncated = TRUE;
- return;
- }
-
- trx_row = reinterpret_cast<i_s_trx_row_t*>(
- table_cache_create_empty_row(
- &cache->innodb_trx, cache));
-
- /* memory could not be allocated */
- if (trx_row == NULL) {
-
- cache->is_truncated = TRUE;
- return;
- }
+ We also add all locks that are relevant to each transaction into
+ innodb_locks' and innodb_lock_waits' caches.
+*/
- if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
-
- /* memory could not be allocated */
- --cache->innodb_trx.rows_used;
- cache->is_truncated = TRUE;
- return;
- }
- }
-}
-
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache(
-/*==================*/
- trx_i_s_cache_t* cache) /*!< in/out: cache */
+static void fetch_data_into_cache_low(trx_i_s_cache_t *cache, const trx_t *trx)
{
- ut_ad(lock_mutex_own());
- ut_ad(trx_sys_mutex_own());
-
- trx_i_s_cache_clear(cache);
+ i_s_locks_row_t *requested_lock_row;
+
+ assert_trx_nonlocking_or_in_list(trx);
+
+ if (add_trx_relevant_locks_to_cache(cache, trx, &requested_lock_row))
+ {
+ if (i_s_trx_row_t *trx_row= reinterpret_cast<i_s_trx_row_t*>(
+ table_cache_create_empty_row(&cache->innodb_trx, cache)))
+ {
+ if (fill_trx_row(trx_row, trx, requested_lock_row, cache))
+ return;
+ --cache->innodb_trx.rows_used;
+ }
+ }
+
+ /* memory could not be allocated */
+ cache->is_truncated= true;
+}
- /* Capture the state of the read-write transactions. This includes
- internal transactions too. They are not on mysql_trx_list */
- fetch_data_into_cache_low(cache, true, &trx_sys->rw_trx_list);
- /* Capture the state of the read-only active transactions */
- fetch_data_into_cache_low(cache, false, &trx_sys->mysql_trx_list);
+/**
+ Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
+ table cache buffer. Cache must be locked for write.
+*/
- cache->is_truncated = FALSE;
+static void fetch_data_into_cache(trx_i_s_cache_t *cache)
+{
+ ut_ad(lock_mutex_own());
+ trx_i_s_cache_clear(cache);
+
+ /* Capture the state of transactions */
+ mutex_enter(&trx_sys.mutex);
+ for (const trx_t *trx= UT_LIST_GET_FIRST(trx_sys.trx_list);
+ trx != NULL;
+ trx= UT_LIST_GET_NEXT(trx_list, trx))
+ {
+ if (trx_is_started(trx) && trx != purge_sys.query->trx)
+ {
+ fetch_data_into_cache_low(cache, trx);
+ if (cache->is_truncated)
+ break;
+ }
+ }
+ mutex_exit(&trx_sys.mutex);
+ cache->is_truncated= false;
}
+
/*******************************************************************//**
Update the transactions cache if it has not been read for some time.
Called from handler/i_s.cc.
@@ -1335,13 +1297,7 @@ trx_i_s_possibly_fetch_data_into_cache(
/* We need to read trx_sys and record/table lock queues */
lock_mutex_enter();
-
- trx_sys_mutex_enter();
-
fetch_data_into_cache(cache);
-
- trx_sys_mutex_exit();
-
lock_mutex_exit();
/* update cache last read time */
@@ -1354,7 +1310,7 @@ trx_i_s_possibly_fetch_data_into_cache(
Returns TRUE if the data in the cache is truncated due to the memory
limit posed by TRX_I_S_MEM_LIMIT.
@return TRUE if truncated */
-ibool
+bool
trx_i_s_cache_is_truncated(
/*=======================*/
trx_i_s_cache_t* cache) /*!< in: cache */
@@ -1398,7 +1354,7 @@ trx_i_s_cache_init(
cache->mem_allocd = 0;
- cache->is_truncated = FALSE;
+ cache->is_truncated = false;
}
/*******************************************************************//**
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 78bf2fc29b8..b6206c9b3be 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -31,7 +31,6 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0log.h"
#include "os0thread.h"
#include "que0que.h"
-#include "read0read.h"
#include "row0purge.h"
#include "row0upd.h"
#include "srv0mon.h"
@@ -42,6 +41,7 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0roll.h"
#include "trx0rseg.h"
#include "trx0trx.h"
+#include <mysql/service_wsrep.h>
/** Maximum allowable purge history length. <=0 means 'infinite'. */
ulong srv_max_purge_lag = 0;
@@ -50,7 +50,7 @@ ulong srv_max_purge_lag = 0;
ulong srv_max_purge_lag_delay = 0;
/** The global data structure coordinating a purge */
-purge_sys_t* purge_sys;
+purge_sys_t purge_sys;
/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
@@ -61,101 +61,67 @@ my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
/** Sentinel value */
-const TrxUndoRsegs TrxUndoRsegsIterator::NullElement(UINT64_UNDEFINED);
+static const TrxUndoRsegs NullElement;
-/** Constructor */
+/** Default constructor */
TrxUndoRsegsIterator::TrxUndoRsegsIterator()
- :
- m_trx_undo_rsegs(NullElement),
- m_iter(m_trx_undo_rsegs.end())
+ : m_rsegs(NullElement), m_iter(m_rsegs.begin())
{
}
/** Sets the next rseg to purge in purge_sys.
+Executed in the purge coordinator thread.
@return whether anything is to be purged */
-inline
-bool
-TrxUndoRsegsIterator::set_next()
+inline bool TrxUndoRsegsIterator::set_next()
{
- mutex_enter(&purge_sys->pq_mutex);
+ mutex_enter(&purge_sys.pq_mutex);
/* Only purge consumes events from the priority queue, user
threads only produce the events. */
/* Check if there are more rsegs to process in the
current element. */
- if (m_iter != m_trx_undo_rsegs.end()) {
-
+ if (m_iter != m_rsegs.end()) {
/* We are still processing rollback segment from
the same transaction and so expected transaction
- number shouldn't increase. Undo increment of
- expected trx_no done by caller assuming rollback
+ number shouldn't increase. Undo the increment of
+ expected commit done by caller assuming rollback
segments from given transaction are done. */
- purge_sys->iter.trx_no = (*m_iter)->last_trx_no;
-
- } else if (!purge_sys->purge_queue.empty()) {
-
- /* Read the next element from the queue.
- Combine elements if they have same transaction number.
- This can happen if a transaction shares redo rollback segment
- with another transaction that has already added it to purge
- queue and former transaction also needs to schedule non-redo
- rollback segment for purge. */
- m_trx_undo_rsegs = NullElement;
-
- purge_pq_t& purge_queue = purge_sys->purge_queue;
-
- while (!purge_queue.empty()) {
-
- if (m_trx_undo_rsegs.get_trx_no() == UINT64_UNDEFINED) {
- m_trx_undo_rsegs = purge_queue.top();
- } else if (purge_queue.top().get_trx_no() ==
- m_trx_undo_rsegs.get_trx_no()) {
- m_trx_undo_rsegs.append(
- purge_queue.top());
- } else {
- break;
- }
-
- purge_queue.pop();
- }
-
- m_iter = m_trx_undo_rsegs.begin();
-
+ purge_sys.tail.commit = (*m_iter)->last_commit;
+ } else if (!purge_sys.purge_queue.empty()) {
+ m_rsegs = purge_sys.purge_queue.top();
+ purge_sys.purge_queue.pop();
+ ut_ad(purge_sys.purge_queue.empty()
+ || purge_sys.purge_queue.top() != m_rsegs);
+ m_iter = m_rsegs.begin();
} else {
/* Queue is empty, reset iterator. */
- m_trx_undo_rsegs = NullElement;
- m_iter = m_trx_undo_rsegs.end();
-
- mutex_exit(&purge_sys->pq_mutex);
-
- purge_sys->rseg = NULL;
+ purge_sys.rseg = NULL;
+ mutex_exit(&purge_sys.pq_mutex);
+ m_rsegs = NullElement;
+ m_iter = m_rsegs.begin();
return false;
}
- purge_sys->rseg = *m_iter++;
-
- mutex_exit(&purge_sys->pq_mutex);
-
- ut_a(purge_sys->rseg != NULL);
+ purge_sys.rseg = *m_iter++;
+ mutex_exit(&purge_sys.pq_mutex);
+ mutex_enter(&purge_sys.rseg->mutex);
- mutex_enter(&purge_sys->rseg->mutex);
-
- ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
- ut_ad(purge_sys->rseg->last_trx_no == m_trx_undo_rsegs.get_trx_no());
+ ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
+ ut_ad(purge_sys.rseg->last_trx_no() == m_rsegs.trx_no());
/* We assume in purge of externally stored fields that space id is
in the range of UNDO tablespace space ids */
- ut_a(purge_sys->rseg->space == TRX_SYS_SPACE
- || srv_is_undo_tablespace(purge_sys->rseg->space));
+ ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(purge_sys.rseg->space->id));
- ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
+ ut_a(purge_sys.tail.commit <= purge_sys.rseg->last_commit);
- purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
- purge_sys->hdr_offset = purge_sys->rseg->last_offset;
- purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+ purge_sys.tail.commit = purge_sys.rseg->last_commit;
+ purge_sys.hdr_offset = purge_sys.rseg->last_offset;
+ purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
- mutex_exit(&purge_sys->rseg->mutex);
+ mutex_exit(&purge_sys.rseg->mutex);
return(true);
}
@@ -169,7 +135,7 @@ purge_graph_build()
{
ut_a(srv_n_purge_threads > 0);
- trx_t* trx = trx_allocate_for_background();
+ trx_t* trx = trx_create();
ut_ad(!trx->id);
trx->start_time = time(NULL);
trx->start_time_micro = microsecond_interval_timer();
@@ -190,86 +156,97 @@ purge_graph_build()
return(fork);
}
-/** Construct the purge system. */
-purge_sys_t::purge_sys_t()
- : latch(), event(os_event_create(0)),
- n_stop(0), running(false), state(PURGE_STATE_INIT),
- query(purge_graph_build()),
- view(), n_submitted(0), n_completed(0),
- iter(), limit(),
-#ifdef UNIV_DEBUG
- done(),
-#endif /* UNIV_DEBUG */
- next_stored(false), rseg(NULL),
- page_no(0), offset(0), hdr_page_no(0), hdr_offset(0),
- rseg_iter(), purge_queue(), pq_mutex(), undo_trunc()
+/** Initialise the purge system. */
+void purge_sys_t::create()
{
- ut_ad(!purge_sys);
- rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
- mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
+ ut_ad(this == &purge_sys);
+ ut_ad(!enabled());
+ ut_ad(!event);
+ event= os_event_create(0);
+ ut_ad(event);
+ m_paused= 0;
+ query= purge_graph_build();
+ n_submitted= 0;
+ n_completed= 0;
+ next_stored= false;
+ rseg= NULL;
+ page_no= 0;
+ offset= 0;
+ hdr_page_no= 0;
+ hdr_offset= 0;
+ rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
+ mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
+ undo_trunc.create();
}
-/** Destruct the purge system. */
-purge_sys_t::~purge_sys_t()
+/** Close the purge subsystem on shutdown. */
+void purge_sys_t::close()
{
- ut_ad(this == purge_sys);
-
- trx_t* trx = query->trx;
- que_graph_free(query);
- ut_ad(!trx->id);
- ut_ad(trx->state == TRX_STATE_ACTIVE);
- trx->state = TRX_STATE_NOT_STARTED;
- trx_free_for_background(trx);
- view.close();
- rw_lock_free(&latch);
- mutex_free(&pq_mutex);
- os_event_destroy(event);
+ ut_ad(this == &purge_sys);
+ if (!event) return;
+
+ m_enabled= false;
+ trx_t* trx = query->trx;
+ que_graph_free(query);
+ ut_ad(!trx->id);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ trx->state= TRX_STATE_NOT_STARTED;
+ trx_free(trx);
+ rw_lock_free(&latch);
+ mutex_free(&pq_mutex);
+ os_event_destroy(event);
}
/*================ UNDO LOG HISTORY LIST =============================*/
-/********************************************************************//**
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
+/** Prepend the history list with an undo log.
+Remove the undo log segment from the rseg slot if it is too big for reuse.
+@param[in] trx transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction */
void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
+trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
{
- trx_undo_t* undo = trx->rsegs.m_redo.update_undo;
- trx_rseg_t* rseg = undo->rseg;
+ DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")",
+ trx->id, trx->no));
+ ut_ad(undo == trx->rsegs.m_redo.undo
+ || undo == trx->rsegs.m_redo.old_insert);
+ trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
+ ut_ad(undo->rseg == rseg);
trx_rsegf_t* rseg_header = trx_rsegf_get(
rseg->space, rseg->page_no, mtr);
+ page_t* undo_page = trx_undo_set_state_at_finish(
+ undo, mtr);
trx_ulogf_t* undo_header = undo_page + undo->hdr_offset;
- if (undo->state != TRX_UNDO_CACHED) {
- ulint hist_size;
-#ifdef UNIV_DEBUG
- trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR;
-#endif /* UNIV_DEBUG */
+ ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
- /* The undo log segment will not be reused */
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- ib::fatal() << "undo->id is " << undo->id;
- }
+ if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG_FORMAT + rseg_header))) {
+ /* This database must have been upgraded from
+ before MariaDB 10.3.5. */
+ trx_rseg_format_upgrade(rseg_header, mtr);
+ }
+ if (undo->state != TRX_UNDO_CACHED) {
+ /* The undo log segment will not be reused */
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
- hist_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
+ uint32_t hist_size = mach_read_from_4(TRX_RSEG_HISTORY_SIZE
+ + rseg_header);
- ut_ad(undo->size == flst_get_len(
- seg_header + TRX_UNDO_PAGE_LIST));
+ ut_ad(undo->size == flst_get_len(TRX_UNDO_SEG_HDR
+ + TRX_UNDO_PAGE_LIST
+ + undo_page));
mlog_write_ulint(
rseg_header + TRX_RSEG_HISTORY_SIZE,
hist_size + undo->size, MLOG_4BYTES, mtr);
+
+ mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
+ trx_sys.get_max_trx_id(), mtr);
}
/* After the purge thread has been given permission to exit,
@@ -291,36 +268,57 @@ trx_purge_add_update_undo_to_history(
user transactions. */
ut_ad(srv_undo_sources
|| trx->undo_no == 0
- || ((srv_is_being_started
- || trx_rollback_or_clean_is_active)
- && purge_sys->state == PURGE_STATE_INIT)
- || (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
- && purge_sys->state == PURGE_STATE_DISABLED)
- || ((trx->in_mysql_trx_list || trx->internal)
+ || (!purge_sys.enabled()
+ && (srv_is_being_started
+ || trx_rollback_is_active
+ || srv_force_recovery >= SRV_FORCE_NO_BACKGROUND))
+ || ((trx->mysql_thd || trx->internal)
&& srv_fast_shutdown));
+#ifdef WITH_WSREP
+ if (wsrep_is_wsrep_xid(trx->xid)) {
+ trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr);
+ }
+#endif
+
+ if (trx->mysql_log_file_name && *trx->mysql_log_file_name) {
+ /* Update the latest MySQL binlog name and offset info
+ in rollback segment header if MySQL binlogging is on
+ or the database server is a MySQL replication save. */
+ trx_rseg_update_binlog_offset(rseg_header, trx, mtr);
+ }
+
/* Add the log as the first in the history list */
flst_add_first(rseg_header + TRX_RSEG_HISTORY,
undo_header + TRX_UNDO_HISTORY_NODE, mtr);
- my_atomic_addlint(&trx_sys->rseg_history_len, 1);
-
- /* Write the trx number to the undo log header */
mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
-
- /* Write information about delete markings to the undo log header */
-
- if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ /* This is needed for upgrading old undo log pages from
+ before MariaDB 10.3.1. */
+ if (UNIV_UNLIKELY(!mach_read_from_2(undo_header
+ + TRX_UNDO_NEEDS_PURGE))) {
+ mlog_write_ulint(undo_header + TRX_UNDO_NEEDS_PURGE, 1,
MLOG_2BYTES, mtr);
}
if (rseg->last_page_no == FIL_NULL) {
rseg->last_page_no = undo->hdr_page_no;
rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
+ rseg->set_last_trx_no(trx->no, undo == trx->rsegs.m_redo.undo);
+ rseg->needs_purge = true;
+ }
+
+ trx_sys.history_insert();
+
+ if (undo->state == TRX_UNDO_CACHED) {
+ UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ } else {
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+ ut_free(undo);
}
+
+ undo = NULL;
}
/** Remove undo log header from the history list.
@@ -336,7 +334,7 @@ trx_purge_remove_log_hdr(
{
flst_remove(rseg_hdr + TRX_RSEG_HISTORY,
log_hdr + TRX_UNDO_HISTORY_NODE, mtr);
- my_atomic_addlint(&trx_sys->rseg_history_len, -1);
+ trx_sys.history_remove();
}
/** Free an undo log segment, and remove the header from the history list.
@@ -348,63 +346,52 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
{
mtr_t mtr;
trx_rsegf_t* rseg_hdr;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- ulint seg_size;
- ulint hist_size;
- bool marked = false;
+ page_t* undo_page;
- for (;;) {
- page_t* undo_page;
+ mtr.start();
+ mutex_enter(&rseg->mutex);
- mtr_start(&mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ undo_page = trx_undo_page_get(
+ page_id_t(rseg->space->id, hdr_addr.page), &mtr);
+
+ /* Mark the last undo log totally purged, so that if the
+ system crashes, the tail of the undo log will not get accessed
+ again. The list of pages in the undo log tail gets
+ inconsistent during the freeing of the segment, and therefore
+ purge should not try to access them again. */
+ mlog_write_ulint(undo_page + hdr_addr.boffset + TRX_UNDO_NEEDS_PURGE,
+ 0, MLOG_2BYTES, &mtr);
+
+ while (!fseg_free_step_not_header(
+ TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+ + undo_page, false, &mtr)) {
+ mutex_exit(&rseg->mutex);
+
+ mtr.commit();
+ mtr.start();
mutex_enter(&rseg->mutex);
rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
undo_page = trx_undo_page_get(
- page_id_t(rseg->space, hdr_addr.page), &mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- log_hdr = undo_page + hdr_addr.boffset;
-
- /* Mark the last undo log totally purged, so that if the
- system crashes, the tail of the undo log will not get accessed
- again. The list of pages in the undo log tail gets inconsistent
- during the freeing of the segment, and therefore purge should
- not try to access them again. */
-
- if (!marked) {
- marked = true;
- mlog_write_ulint(
- log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, &mtr);
- }
-
- if (fseg_free_step_not_header(
- seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr)) {
-
- break;
- }
-
- mutex_exit(&rseg->mutex);
-
- mtr_commit(&mtr);
+ page_id_t(rseg->space->id, hdr_addr.page), &mtr);
}
/* The page list may now be inconsistent, but the length field
stored in the list base node tells us how big it was before we
started the freeing. */
- seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST);
+ const ulint seg_size = flst_get_len(
+ TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + undo_page);
/* We may free the undo log segment header page; it must be freed
within the same mtr as the undo log header is removed from the
history list: otherwise, in case of a database crash, the segment
could become inaccessible garbage in the file space. */
- trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
+ trx_purge_remove_log_hdr(rseg_hdr, undo_page + hdr_addr.boffset, &mtr);
do {
@@ -413,10 +400,11 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
is not flooded with bufferfixed pages: see the note in
fsp0fsp.cc. */
- } while (!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr));
+ } while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+ + undo_page, false, &mtr));
- hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, &mtr);
+ const ulint hist_size = mach_read_from_4(rseg_hdr
+ + TRX_RSEG_HISTORY_SIZE);
ut_ad(hist_size >= seg_size);
mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
@@ -433,10 +421,12 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
/** Remove unnecessary history data from a rollback segment.
@param[in,out] rseg rollback segment
-@param[in] limit truncate offset */
+@param[in] limit truncate anything before this */
static
void
-trx_purge_truncate_rseg_history(trx_rseg_t* rseg, const purge_iter_t* limit)
+trx_purge_truncate_rseg_history(
+ trx_rseg_t& rseg,
+ const purge_sys_t::iterator& limit)
{
fil_addr_t hdr_addr;
fil_addr_t prev_hdr_addr;
@@ -447,48 +437,37 @@ trx_purge_truncate_rseg_history(trx_rseg_t* rseg, const purge_iter_t* limit)
mtr_t mtr;
trx_id_t undo_trx_no;
- mtr_start(&mtr);
- ut_ad(rseg->is_persistent());
- mutex_enter(&(rseg->mutex));
+ mtr.start();
+ ut_ad(rseg.is_persistent());
+ mutex_enter(&rseg.mutex);
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
hdr_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
loop:
if (hdr_addr.page == FIL_NULL) {
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-
+func_exit:
+ mutex_exit(&rseg.mutex);
+ mtr.commit();
return;
}
- undo_page = trx_undo_page_get(page_id_t(rseg->space, hdr_addr.page),
+ undo_page = trx_undo_page_get(page_id_t(rseg.space->id, hdr_addr.page),
&mtr);
log_hdr = undo_page + hdr_addr.boffset;
undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
- if (undo_trx_no >= limit->trx_no) {
-
- /* limit space_id should match the rollback segment
- space id to avoid freeing of the page belongs to
- different rollback segment for the same trx_no. */
- if (undo_trx_no == limit->trx_no
- && rseg->space == limit->undo_rseg_space) {
-
+ if (undo_trx_no >= limit.trx_no()) {
+ if (undo_trx_no == limit.trx_no()) {
trx_undo_truncate_start(
- rseg, hdr_addr.page,
- hdr_addr.boffset, limit->undo_no);
+ &rseg, hdr_addr.page,
+ hdr_addr.boffset, limit.undo_no);
}
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return;
+ goto func_exit;
}
prev_hdr_addr = trx_purge_get_log_from_hist(
@@ -501,24 +480,24 @@ loop:
/* We can free the whole log segment */
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
+ mutex_exit(&rseg.mutex);
+ mtr.commit();
/* calls the trx_purge_remove_log_hdr()
inside trx_purge_free_segment(). */
- trx_purge_free_segment(rseg, hdr_addr);
+ trx_purge_free_segment(&rseg, hdr_addr);
} else {
/* Remove the log hdr from the rseg history. */
trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
+ mutex_exit(&rseg.mutex);
+ mtr.commit();
}
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
+ mtr.start();
+ mutex_enter(&rseg.mutex);
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
hdr_addr = prev_hdr_addr;
@@ -609,8 +588,8 @@ namespace undo {
return;
}
- ulint sz = UNIV_PAGE_SIZE;
- void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ ulint sz = srv_page_size;
+ void* buf = ut_zalloc_nokey(sz + srv_page_size);
if (buf == NULL) {
os_file_close(handle);
os_file_delete(innodb_log_file_key, log_file_name);
@@ -619,7 +598,7 @@ namespace undo {
}
byte* log_buf = static_cast<byte*>(
- ut_align(buf, UNIV_PAGE_SIZE));
+ ut_align(buf, srv_page_size));
mach_write_to_4(log_buf, undo::s_magic);
@@ -677,8 +656,8 @@ namespace undo {
return(false);
}
- ulint sz = UNIV_PAGE_SIZE;
- void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+ ulint sz = srv_page_size;
+ void* buf = ut_zalloc_nokey(sz + srv_page_size);
if (buf == NULL) {
os_file_close(handle);
os_file_delete(innodb_log_file_key,
@@ -688,7 +667,7 @@ namespace undo {
}
byte* log_buf = static_cast<byte*>(
- ut_align(buf, UNIV_PAGE_SIZE));
+ ut_align(buf, srv_page_size));
IORequest request(IORequest::READ);
@@ -766,7 +745,7 @@ trx_purge_mark_undo_for_truncate(
for (ulint i = 1; i <= srv_undo_tablespaces_active; i++) {
if (fil_space_get_size(space_id)
- > (srv_max_undo_log_size / srv_page_size)) {
+ > (srv_max_undo_log_size >> srv_page_size_shift)) {
/* Tablespace qualifies for truncate. */
undo_trunc->mark(space_id);
undo::Truncate::add_space_to_trunc_list(space_id);
@@ -792,9 +771,10 @@ trx_purge_mark_undo_for_truncate(
/* Step-3: Iterate over all the rsegs of selected UNDO tablespace
and mark them temporarily unavailable for allocation.*/
for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+ if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
ut_ad(rseg->is_persistent());
- if (rseg->space == undo_trunc->get_marked_space_id()) {
+ if (rseg->space->id
+ == undo_trunc->get_marked_space_id()) {
/* Once set this rseg will not be allocated
to new booting transaction but we will wait
@@ -816,17 +796,17 @@ void
trx_purge_cleanse_purge_queue(
undo::Truncate* undo_trunc)
{
- mutex_enter(&purge_sys->pq_mutex);
+ mutex_enter(&purge_sys.pq_mutex);
typedef std::vector<TrxUndoRsegs> purge_elem_list_t;
purge_elem_list_t purge_elem_list;
/* Remove rseg instances that are in the purge queue before we start
truncate of corresponding UNDO truncate. */
- while (!purge_sys->purge_queue.empty()) {
- purge_elem_list.push_back(purge_sys->purge_queue.top());
- purge_sys->purge_queue.pop();
+ while (!purge_sys.purge_queue.empty()) {
+ purge_elem_list.push_back(purge_sys.purge_queue.top());
+ purge_sys.purge_queue.pop();
}
- ut_ad(purge_sys->purge_queue.empty());
+ ut_ad(purge_sys.purge_queue.empty());
for (purge_elem_list_t::iterator it = purge_elem_list.begin();
it != purge_elem_list.end();
@@ -836,21 +816,18 @@ trx_purge_cleanse_purge_queue(
it2 != it->end();
++it2) {
- if ((*it2)->space
+ if ((*it2)->space->id
== undo_trunc->get_marked_space_id()) {
it->erase(it2);
break;
}
}
- if (it->size()) {
- /* size != 0 suggest that there exist other rsegs that
- needs processing so add this element to purge queue.
- Note: Other rseg could be non-redo rsegs. */
- purge_sys->purge_queue.push(*it);
+ if (!it->empty()) {
+ purge_sys.purge_queue.push(*it);
}
}
- mutex_exit(&purge_sys->pq_mutex);
+ mutex_exit(&purge_sys.pq_mutex);
}
/** Iterate over selected UNDO tablespace and check if all the rsegs
@@ -860,7 +837,7 @@ that resides in the tablespace are free.
static
void
trx_purge_initiate_truncate(
- purge_iter_t* limit,
+ const purge_sys_t::iterator& limit,
undo::Truncate* undo_trunc)
{
/* Step-1: Early check to findout if any of the the UNDO tablespace
@@ -904,23 +881,11 @@ trx_purge_initiate_truncate(
ulint cached_undo_size = 0;
for (trx_undo_t* undo =
- UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ UT_LIST_GET_FIRST(rseg->undo_cached);
undo != NULL && all_free;
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
- if (limit->trx_no < undo->trx_id) {
- all_free = false;
- } else {
- cached_undo_size += undo->size;
- }
- }
-
- for (trx_undo_t* undo =
- UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- undo != NULL && all_free;
- undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
- if (limit->trx_no < undo->trx_id) {
+ if (limit.trx_no() < undo->trx_id) {
all_free = false;
} else {
cached_undo_size += undo->size;
@@ -958,6 +923,14 @@ trx_purge_initiate_truncate(
ut_a(srv_is_undo_tablespace(space_id));
+ fil_space_t* space = fil_space_get(space_id);
+
+ if (!space) {
+not_found:
+ ib::error() << "Failed to find UNDO tablespace " << space_id;
+ return;
+ }
+
/* Flush all to-be-discarded pages of the tablespace.
During truncation, we do not want any writes to the
@@ -970,8 +943,8 @@ trx_purge_initiate_truncate(
break crash recovery. So, we cannot avoid the write. */
{
FlushObserver observer(
- space_id,
- UT_LIST_GET_FIRST(purge_sys->query->thrs)->graph->trx,
+ space,
+ UT_LIST_GET_FIRST(purge_sys.query->thrs)->graph->trx,
NULL);
buf_LRU_flush_or_remove_pages(space_id, &observer);
}
@@ -979,11 +952,10 @@ trx_purge_initiate_truncate(
log_free_check();
/* Adjust the tablespace metadata. */
- fil_space_t* space = fil_truncate_prepare(space_id);
+ space = fil_truncate_prepare(space_id);
if (!space) {
- ib::error() << "Failed to find UNDO tablespace " << space_id;
- return;
+ goto not_found;
}
/* Undo tablespace always are a single file. */
@@ -998,69 +970,54 @@ trx_purge_initiate_truncate(
mtr.start();
mtr_x_lock(&space->latch, &mtr);
fil_truncate_log(space, size, &mtr);
- fsp_header_init(space_id, size, &mtr);
- mutex_enter(&fil_system->mutex);
+ fsp_header_init(space, size, &mtr);
+ mutex_enter(&fil_system.mutex);
space->size = file->size = size;
- mutex_exit(&fil_system->mutex);
+ mutex_exit(&fil_system.mutex);
+
+ buf_block_t* sys_header = trx_sysf_get(&mtr);
for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i);
-
buf_block_t* rblock = trx_rseg_header_create(
- space_id, ULINT_MAX, rseg->id, &mtr);
+ space, rseg->id, sys_header, &mtr);
ut_ad(rblock);
rseg->page_no = rblock ? rblock->page.id.page_no() : FIL_NULL;
/* Before re-initialization ensure that we free the existing
structure. There can't be any active transactions. */
- ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
- ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+ ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+ ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0);
trx_undo_t* next_undo;
- for (trx_undo_t* undo =
- UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
undo != NULL;
undo = next_undo) {
next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- trx_undo_mem_free(undo);
+ ut_free(undo);
}
- for (trx_undo_t* undo =
- UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- undo != NULL;
- undo = next_undo) {
-
- next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- trx_undo_mem_free(undo);
- }
-
- UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list);
/* These were written by trx_rseg_header_create(). */
- ut_ad(mach_read_from_4(TRX_RSEG + TRX_RSEG_MAX_SIZE
- + rblock->frame)
- == uint32_t(rseg->max_size));
+ ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
+ + rblock->frame));
ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE
+ rblock->frame));
- rseg->max_size = ULINT_MAX;
-
/* Initialize the undo log lists according to the rseg header */
rseg->curr_size = 1;
rseg->trx_ref_count = 0;
rseg->last_page_no = FIL_NULL;
rseg->last_offset = 0;
- rseg->last_trx_no = 0;
- rseg->last_del_marks = FALSE;
+ rseg->last_commit = 0;
+ rseg->needs_purge = false;
}
mtr.commit();
@@ -1076,24 +1033,24 @@ trx_purge_initiate_truncate(
/* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */
- mutex_enter(&fil_system->mutex);
+ mutex_enter(&fil_system.mutex);
ut_ad(space->stop_new_ops);
ut_ad(space->is_being_truncated);
space->stop_new_ops = false;
space->is_being_truncated = false;
- mutex_exit(&fil_system->mutex);
+ mutex_exit(&fil_system.mutex);
- if (purge_sys->rseg != NULL
- && purge_sys->rseg->last_page_no == FIL_NULL) {
- /* If purge_sys->rseg is pointing to rseg that was recently
+ if (purge_sys.rseg != NULL
+ && purge_sys.rseg->last_page_no == FIL_NULL) {
+ /* If purge_sys.rseg is pointing to rseg that was recently
truncated then move to next rseg element.
- Note: Ideally purge_sys->rseg should be NULL because purge
+ Note: Ideally purge_sys.rseg should be NULL because purge
should complete processing of all the records but there is
purge_batch_size that can force the purge loop to exit before
- all the records are purged and in this case purge_sys->rseg
+ all the records are purged and in this case purge_sys.rseg
could point to a valid rseg waiting for next purge cycle. */
- purge_sys->next_stored = false;
- purge_sys->rseg = NULL;
+ purge_sys.next_stored = false;
+ purge_sys.rseg = NULL;
}
DBUG_EXECUTE_IF("ib_undo_trunc",
@@ -1113,35 +1070,26 @@ trx_purge_initiate_truncate(
undo::Truncate::clear_trunc_list();
}
-/********************************************************************//**
+/**
Removes unnecessary history data from rollback segments. NOTE that when this
-function is called, the caller must not have any latches on undo log pages! */
-static
-void
-trx_purge_truncate_history(
-/*========================*/
- purge_iter_t* limit, /*!< in: truncate limit */
- const ReadView* view) /*!< in: purge view */
+function is called, the caller must not have any latches on undo log pages!
+*/
+static void trx_purge_truncate_history()
{
- ut_ad(trx_purge_check_limit());
-
- /* We play safe and set the truncate limit at most to the purge view
- low_limit number, though this is not necessary */
-
- if (limit->trx_no >= view->low_limit_no()) {
- limit->trx_no = view->low_limit_no();
- limit->undo_no = 0;
- limit->undo_rseg_space = ULINT_UNDEFINED;
+ ut_ad(purge_sys.head <= purge_sys.tail);
+ purge_sys_t::iterator& head = purge_sys.head.commit
+ ? purge_sys.head : purge_sys.tail;
+
+ if (head.trx_no() >= purge_sys.view.low_limit_no()) {
+ /* This is sometimes necessary. TODO: find out why. */
+ head.reset_trx_no(purge_sys.view.low_limit_no());
+ head.undo_no = 0;
}
- ut_ad(limit->trx_no <= purge_sys->view.low_limit_no());
-
for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg = trx_sys->rseg_array[i];
-
- if (rseg != NULL) {
- ut_a(rseg->id == i);
- trx_purge_truncate_rseg_history(rseg, limit);
+ if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
+ ut_ad(rseg->id == i);
+ trx_purge_truncate_rseg_history(*rseg, head);
}
}
@@ -1149,19 +1097,15 @@ trx_purge_truncate_history(
can (greedy approach). This will ensure when the server is idle we
try and truncate all the UNDO tablespaces. */
for (ulint i = srv_undo_tablespaces_active; i--; ) {
- trx_purge_mark_undo_for_truncate(&purge_sys->undo_trunc);
- trx_purge_initiate_truncate(limit, &purge_sys->undo_trunc);
+ trx_purge_mark_undo_for_truncate(&purge_sys.undo_trunc);
+ trx_purge_initiate_truncate(head, &purge_sys.undo_trunc);
}
}
/***********************************************************************//**
Updates the last not yet purged history log info in rseg when we have purged
-a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
-static
-void
-trx_purge_rseg_get_next_history_log(
-/*================================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
+a whole undo log. Advances also purge_sys.purge_trx_no past the purged log. */
+static void trx_purge_rseg_get_next_history_log(
ulint* n_pages_handled)/*!< in/out: number of UNDO pages
handled */
{
@@ -1169,24 +1113,23 @@ trx_purge_rseg_get_next_history_log(
trx_ulogf_t* log_hdr;
fil_addr_t prev_log_addr;
trx_id_t trx_no;
- ibool del_marks;
mtr_t mtr;
- mutex_enter(&(rseg->mutex));
+ mutex_enter(&purge_sys.rseg->mutex);
- ut_a(rseg->last_page_no != FIL_NULL);
+ ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
- purge_sys->iter.trx_no = rseg->last_trx_no + 1;
- purge_sys->iter.undo_no = 0;
- purge_sys->iter.undo_rseg_space = ULINT_UNDEFINED;
- purge_sys->next_stored = false;
+ purge_sys.tail.commit = purge_sys.rseg->last_commit + 1;
+ purge_sys.tail.undo_no = 0;
+ purge_sys.next_stored = false;
- mtr_start(&mtr);
+ mtr.start();
undo_page = trx_undo_page_get_s_latched(
- page_id_t(rseg->space, rseg->last_page_no), &mtr);
+ page_id_t(purge_sys.rseg->space->id,
+ purge_sys.rseg->last_page_no), &mtr);
- log_hdr = undo_page + rseg->last_offset;
+ log_hdr = undo_page + purge_sys.rseg->last_offset;
/* Increase the purge page count by one for every handled log */
@@ -1195,56 +1138,53 @@ trx_purge_rseg_get_next_history_log(
prev_log_addr = trx_purge_get_log_from_hist(
flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
- if (prev_log_addr.page == FIL_NULL) {
+ const bool empty = prev_log_addr.page == FIL_NULL;
+
+ if (empty) {
/* No logs left in the history list */
+ purge_sys.rseg->last_page_no = FIL_NULL;
+ }
- rseg->last_page_no = FIL_NULL;
+ mutex_exit(&purge_sys.rseg->mutex);
+ mtr.commit();
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
+ if (empty) {
return;
}
- mutex_exit(&rseg->mutex);
-
- mtr_commit(&mtr);
-
- /* Read the trx number and del marks from the previous log header */
- mtr_start(&mtr);
+ /* Read the previous log header. */
+ mtr.start();
- log_hdr = trx_undo_page_get_s_latched(page_id_t(rseg->space,
- prev_log_addr.page),
- &mtr)
+ log_hdr = trx_undo_page_get_s_latched(
+ page_id_t(purge_sys.rseg->space->id, prev_log_addr.page),
+ &mtr)
+ prev_log_addr.boffset;
trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
- del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
+ unsigned purge = mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE);
+ ut_ad(purge <= 1);
mtr_commit(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg->last_page_no = prev_log_addr.page;
- rseg->last_offset = prev_log_addr.boffset;
- rseg->last_trx_no = trx_no;
- rseg->last_del_marks = del_marks;
+ mutex_enter(&purge_sys.rseg->mutex);
- TrxUndoRsegs elem(rseg->last_trx_no);
- elem.push_back(rseg);
+ purge_sys.rseg->last_page_no = prev_log_addr.page;
+ purge_sys.rseg->last_offset = prev_log_addr.boffset;
+ purge_sys.rseg->set_last_trx_no(trx_no, purge != 0);
+ purge_sys.rseg->needs_purge = purge != 0;
/* Purge can also produce events, however these are already ordered
in the rollback segment and any user generated event will be greater
than the events that Purge produces. ie. Purge can never produce
events from an empty rollback segment. */
- mutex_enter(&purge_sys->pq_mutex);
+ mutex_enter(&purge_sys.pq_mutex);
- purge_sys->purge_queue.push(elem);
+ purge_sys.purge_queue.push(*purge_sys.rseg);
- mutex_exit(&purge_sys->pq_mutex);
+ mutex_exit(&purge_sys.pq_mutex);
- mutex_exit(&rseg->mutex);
+ mutex_exit(&purge_sys.rseg->mutex);
}
/** Position the purge sys "iterator" on the undo record to use for purging. */
@@ -1255,46 +1195,36 @@ trx_purge_read_undo_rec()
ulint offset;
ulint page_no;
ib_uint64_t undo_no;
- ulint undo_rseg_space;
- purge_sys->hdr_offset = purge_sys->rseg->last_offset;
- page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+ purge_sys.hdr_offset = purge_sys.rseg->last_offset;
+ page_no = purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
- if (purge_sys->rseg->last_del_marks) {
+ if (purge_sys.rseg->needs_purge) {
mtr_t mtr;
- trx_undo_rec_t* undo_rec = NULL;
+ mtr.start();
+ if (trx_undo_rec_t* undo_rec = trx_undo_get_first_rec(
+ purge_sys.rseg->space, purge_sys.hdr_page_no,
+ purge_sys.hdr_offset, RW_S_LATCH, &mtr)) {
- mtr_start(&mtr);
-
- undo_rec = trx_undo_get_first_rec(
- purge_sys->rseg->space,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset, RW_S_LATCH, &mtr);
-
- if (undo_rec != NULL) {
offset = page_offset(undo_rec);
undo_no = trx_undo_rec_get_undo_no(undo_rec);
- undo_rseg_space = purge_sys->rseg->space;
page_no = page_get_page_no(page_align(undo_rec));
} else {
offset = 0;
undo_no = 0;
- undo_rseg_space = ULINT_UNDEFINED;
}
- mtr_commit(&mtr);
+ mtr.commit();
} else {
offset = 0;
undo_no = 0;
- undo_rseg_space = ULINT_UNDEFINED;
}
- purge_sys->offset = offset;
- purge_sys->page_no = page_no;
- purge_sys->iter.undo_no = undo_no;
- purge_sys->iter.undo_rseg_space = undo_rseg_space;
+ purge_sys.offset = offset;
+ purge_sys.page_no = page_no;
+ purge_sys.tail.undo_no = undo_no;
- purge_sys->next_stored = true;
+ purge_sys.next_stored = true;
}
/***********************************************************************//**
@@ -1307,9 +1237,9 @@ void
trx_purge_choose_next_log(void)
/*===========================*/
{
- ut_ad(!purge_sys->next_stored);
+ ut_ad(!purge_sys.next_stored);
- if (purge_sys->rseg_iter.set_next()) {
+ if (purge_sys.rseg_iter.set_next()) {
trx_purge_read_undo_rec();
} else {
/* There is nothing to do yet. */
@@ -1338,19 +1268,18 @@ trx_purge_get_next_rec(
ulint space;
mtr_t mtr;
- ut_ad(purge_sys->next_stored);
- ut_ad(purge_sys->iter.trx_no < purge_sys->view.low_limit_no());
+ ut_ad(purge_sys.next_stored);
+ ut_ad(purge_sys.tail.trx_no() < purge_sys.view.low_limit_no());
- space = purge_sys->rseg->space;
- page_no = purge_sys->page_no;
- offset = purge_sys->offset;
+ space = purge_sys.rseg->space->id;
+ page_no = purge_sys.page_no;
+ offset = purge_sys.offset;
if (offset == 0) {
/* It is the dummy undo log record, which means that there is
no need to purge this undo log */
- trx_purge_rseg_get_next_history_log(
- purge_sys->rseg, n_pages_handled);
+ trx_purge_rseg_get_next_history_log(n_pages_handled);
/* Look for the next undo log and record to purge */
@@ -1366,52 +1295,18 @@ trx_purge_get_next_rec(
rec = undo_page + offset;
- rec2 = rec;
-
- for (;;) {
- ulint type;
- trx_undo_rec_t* next_rec;
- ulint cmpl_info;
-
- /* Try first to find the next record which requires a purge
- operation from the same page of the same undo log */
-
- next_rec = trx_undo_page_get_next_rec(
- rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
-
- if (next_rec == NULL) {
- rec2 = trx_undo_get_next_rec(
- rec2, purge_sys->hdr_page_no,
- purge_sys->hdr_offset, &mtr);
- break;
- }
-
- rec2 = next_rec;
-
- type = trx_undo_rec_get_type(rec2);
-
- if (type == TRX_UNDO_DEL_MARK_REC) {
-
- break;
- }
-
- cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
-
- if (trx_undo_rec_get_extern_storage(rec2)) {
- break;
- }
+ rec2 = trx_undo_page_get_next_rec(rec, purge_sys.hdr_page_no,
+ purge_sys.hdr_offset);
- if ((type == TRX_UNDO_UPD_EXIST_REC)
- && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- break;
- }
+ if (rec2 == NULL) {
+ rec2 = trx_undo_get_next_rec(rec, purge_sys.hdr_page_no,
+ purge_sys.hdr_offset, &mtr);
}
if (rec2 == NULL) {
mtr_commit(&mtr);
- trx_purge_rseg_get_next_history_log(
- purge_sys->rseg, n_pages_handled);
+ trx_purge_rseg_get_next_history_log(n_pages_handled);
/* Look for the next undo log and record to purge */
@@ -1426,10 +1321,9 @@ trx_purge_get_next_rec(
} else {
page = page_align(rec2);
- purge_sys->offset = rec2 - page;
- purge_sys->page_no = page_get_page_no(page);
- purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
- purge_sys->iter.undo_rseg_space = space;
+ purge_sys.offset = ulint(rec2 - page);
+ purge_sys.page_no = page_get_page_no(page);
+ purge_sys.tail.undo_no = trx_undo_rec_get_undo_no(rec2);
if (undo_page != page) {
/* We advance to a new page of the undo log: */
@@ -1458,17 +1352,17 @@ trx_purge_fetch_next_rec(
handled */
mem_heap_t* heap) /*!< in: memory heap where copied */
{
- if (!purge_sys->next_stored) {
+ if (!purge_sys.next_stored) {
trx_purge_choose_next_log();
- if (!purge_sys->next_stored) {
+ if (!purge_sys.next_stored) {
DBUG_PRINT("ib_purge",
("no logs left in the history list"));
return(NULL);
}
}
- if (purge_sys->iter.trx_no >= purge_sys->view.low_limit_no()) {
+ if (purge_sys.tail.trx_no() >= purge_sys.view.low_limit_no()) {
return(NULL);
}
@@ -1477,8 +1371,11 @@ trx_purge_fetch_next_rec(
os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */
*roll_ptr = trx_undo_build_roll_ptr(
- FALSE, purge_sys->rseg->id,
- purge_sys->page_no, purge_sys->offset);
+ /* row_purge_record_func() will later set
+ ROLL_PTR_INSERT_FLAG for TRX_UNDO_INSERT_REC */
+ false,
+ purge_sys.rseg->id,
+ purge_sys.page_no, purge_sys.offset);
/* The following call will advance the stored values of the
purge iterator. */
@@ -1486,30 +1383,26 @@ trx_purge_fetch_next_rec(
return(trx_purge_get_next_rec(n_pages_handled, heap));
}
-/*******************************************************************//**
-This function runs a purge batch.
+/** Run a purge batch.
+@param n_purge_threads number of purge threads
@return number of undo log pages handled in the batch */
static
ulint
-trx_purge_attach_undo_recs(
-/*=======================*/
- ulint n_purge_threads,/*!< in: number of purge threads */
- purge_sys_t* purge_sys, /*!< in/out: purge instance */
- ulint batch_size) /*!< in: no. of pages to purge */
+trx_purge_attach_undo_recs(ulint n_purge_threads)
{
que_thr_t* thr;
ulint i;
ulint n_pages_handled = 0;
- ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
+ ulint n_thrs = UT_LIST_GET_LEN(purge_sys.query->thrs);
ut_a(n_purge_threads > 0);
- purge_sys->limit = purge_sys->iter;
+ purge_sys.head = purge_sys.tail;
#ifdef UNIV_DEBUG
i = 0;
/* Debug code to validate some pre-requisites and reset done flag. */
- for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
thr != NULL && i < n_purge_threads;
thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
@@ -1531,13 +1424,15 @@ trx_purge_attach_undo_recs(
/* Fetch and parse the UNDO records. The UNDO records are added
to a per purge node vector. */
- thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
ut_a(n_thrs > 0 && thr != NULL);
- ut_ad(trx_purge_check_limit());
+ ut_ad(purge_sys.head <= purge_sys.tail);
i = 0;
+ const ulint batch_size = srv_purge_batch_size;
+
while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) {
purge_node_t* node;
trx_purge_rec_t* purge_rec;
@@ -1554,11 +1449,11 @@ trx_purge_attach_undo_recs(
/* Track the max {trx_id, undo_no} for truncating the
UNDO logs once we have purged the records. */
- if (trx_purge_check_limit()) {
- purge_sys->limit = purge_sys->iter;
+ if (purge_sys.head <= purge_sys.tail) {
+ purge_sys.head = purge_sys.tail;
}
- /* Fetch the next record, and advance the purge_sys->iter. */
+ /* Fetch the next record, and advance the purge_sys.tail. */
purge_rec->undo_rec = trx_purge_fetch_next_rec(
&purge_rec->roll_ptr, &n_pages_handled, node->heap);
@@ -1586,13 +1481,13 @@ trx_purge_attach_undo_recs(
thr = UT_LIST_GET_NEXT(thrs, thr);
if (!(++i % n_purge_threads)) {
- thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
}
ut_a(thr != NULL);
}
- ut_ad(trx_purge_check_limit());
+ ut_ad(purge_sys.head <= purge_sys.tail);
return(n_pages_handled);
}
@@ -1612,12 +1507,12 @@ trx_purge_dml_delay(void)
/* If purge lag is set (ie. > 0) then calculate the new DML delay.
Note: we do a dirty read of the trx_sys_t data structure here,
- without holding trx_sys->mutex. */
+ without holding trx_sys.mutex. */
if (srv_max_purge_lag > 0) {
float ratio;
- ratio = float(trx_sys->rseg_history_len) / srv_max_purge_lag;
+ ratio = float(trx_sys.history_size()) / srv_max_purge_lag;
if (ratio > 1.0) {
/* If the history list length exceeds the
@@ -1637,18 +1532,14 @@ trx_purge_dml_delay(void)
return(delay);
}
-/*******************************************************************//**
-Wait for pending purge jobs to complete. */
+/** Wait for pending purge jobs to complete. */
static
void
-trx_purge_wait_for_workers_to_complete(
-/*===================================*/
- purge_sys_t* purge_sys) /*!< in: purge instance */
+trx_purge_wait_for_workers_to_complete()
{
- ulint n_submitted = purge_sys->n_submitted;
-
/* Ensure that the work queue empties out. */
- while ((ulint) my_atomic_loadlint(&purge_sys->n_completed) != n_submitted) {
+ while (my_atomic_loadlint(&purge_sys.n_completed)
+ != purge_sys.n_submitted) {
if (srv_get_task_queue_length() > 0) {
srv_release_threads(SRV_WORKER, 1);
@@ -1657,9 +1548,6 @@ trx_purge_wait_for_workers_to_complete(
os_thread_yield();
}
- /* None of the worker threads should be doing any work. */
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
/* There should be no outstanding tasks as long
as the worker threads are active. */
ut_a(srv_get_task_queue_length() == 0);
@@ -1673,8 +1561,6 @@ trx_purge(
/*======*/
ulint n_purge_threads, /*!< in: number of purge tasks
to submit to the work queue */
- ulint batch_size, /*!< in: the maximum number of records
- to purge in one batch */
bool truncate) /*!< in: truncate history if true */
{
que_thr_t* thr = NULL;
@@ -1685,11 +1571,12 @@ trx_purge(
srv_dml_needed_delay = trx_purge_dml_delay();
/* The number of tasks submitted should be completed. */
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
+ ut_a(purge_sys.n_submitted
+ == my_atomic_loadlint(&purge_sys.n_completed));
- rw_lock_x_lock(&purge_sys->latch);
- trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
- rw_lock_x_unlock(&purge_sys->latch);
+ rw_lock_x_lock(&purge_sys.latch);
+ trx_sys.clone_oldest_view();
+ rw_lock_x_unlock(&purge_sys.latch);
#ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) {
@@ -1698,66 +1585,31 @@ trx_purge(
#endif /* UNIV_DEBUG */
/* Fetch the UNDO recs that need to be purged. */
- n_pages_handled = trx_purge_attach_undo_recs(
- n_purge_threads, purge_sys, batch_size);
-
- /* Do we do an asynchronous purge or not ? */
- if (n_purge_threads > 1) {
- ulint i = 0;
-
- /* Submit the tasks to the work queue. */
- for (i = 0; i < n_purge_threads - 1; ++i) {
- thr = que_fork_scheduler_round_robin(
- purge_sys->query, thr);
-
- ut_a(thr != NULL);
-
- srv_que_task_enqueue_low(thr);
- }
-
- thr = que_fork_scheduler_round_robin(purge_sys->query, thr);
- ut_a(thr != NULL);
-
- purge_sys->n_submitted += n_purge_threads - 1;
-
- goto run_synchronously;
-
- /* Do it synchronously. */
- } else {
- thr = que_fork_scheduler_round_robin(purge_sys->query, NULL);
- ut_ad(thr);
+ n_pages_handled = trx_purge_attach_undo_recs(n_purge_threads);
+ purge_sys.n_submitted += n_purge_threads;
+
+ /* Submit tasks to workers queue if using multi-threaded purge. */
+ for (ulint i = n_purge_threads; --i; ) {
+ thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
+ ut_a(thr);
+ srv_que_task_enqueue_low(thr);
+ }
-run_synchronously:
- ++purge_sys->n_submitted;
+ thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
- que_run_threads(thr);
+ que_run_threads(thr);
- my_atomic_addlint(
- &purge_sys->n_completed, 1);
+ my_atomic_addlint(&purge_sys.n_completed, 1);
- if (n_purge_threads > 1) {
- trx_purge_wait_for_workers_to_complete(purge_sys);
- }
+ if (n_purge_threads > 1) {
+ trx_purge_wait_for_workers_to_complete();
}
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
-#ifdef UNIV_DEBUG
- rw_lock_x_lock(&purge_sys->latch);
- if (purge_sys->limit.trx_no == 0) {
- purge_sys->done = purge_sys->iter;
- } else {
- purge_sys->done = purge_sys->limit;
- }
- rw_lock_x_unlock(&purge_sys->latch);
-#endif /* UNIV_DEBUG */
+ ut_a(purge_sys.n_submitted
+ == my_atomic_loadlint(&purge_sys.n_completed));
if (truncate) {
- trx_purge_truncate_history(
- purge_sys->limit.trx_no
- ? &purge_sys->limit
- : &purge_sys->iter,
- &purge_sys->view);
+ trx_purge_truncate_history();
}
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
@@ -1766,111 +1618,63 @@ run_synchronously:
return(n_pages_handled);
}
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-purge_state_t
-trx_purge_state(void)
-/*=================*/
+/** Stop purge during FLUSH TABLES FOR EXPORT */
+void purge_sys_t::stop()
{
- purge_state_t state;
-
- rw_lock_x_lock(&purge_sys->latch);
-
- state = purge_sys->state;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- return(state);
+ rw_lock_x_lock(&latch);
+
+ if (!enabled_latched())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ rw_lock_x_unlock(&latch);
+ return;
+ }
+
+ ut_ad(srv_n_purge_threads > 0);
+
+ if (0 == my_atomic_add32_explicit(&m_paused, 1, MY_MEMORY_ORDER_RELAXED))
+ {
+ /* We need to wakeup the purge thread in case it is suspended, so
+ that it can acknowledge the state change. */
+ const int64_t sig_count = os_event_reset(event);
+ rw_lock_x_unlock(&latch);
+ ib::info() << "Stopping purge";
+ srv_purge_wakeup();
+ /* Wait for purge coordinator to signal that it is suspended. */
+ os_event_wait_low(event, sig_count);
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
+ return;
+ }
+
+ rw_lock_x_unlock(&latch);
+
+ if (running())
+ {
+ ib::info() << "Waiting for purge to stop";
+ while (running())
+ os_thread_sleep(10000);
+ }
}
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-void
-trx_purge_stop(void)
-/*================*/
+/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+void purge_sys_t::resume()
{
- rw_lock_x_lock(&purge_sys->latch);
-
- switch (purge_sys->state) {
- case PURGE_STATE_INIT:
- case PURGE_STATE_DISABLED:
- ut_error;
- case PURGE_STATE_EXIT:
- /* Shutdown must have been initiated during
- FLUSH TABLES FOR EXPORT. */
- ut_ad(!srv_undo_sources);
-unlock:
- rw_lock_x_unlock(&purge_sys->latch);
- break;
- case PURGE_STATE_STOP:
- ut_ad(srv_n_purge_threads > 0);
- ++purge_sys->n_stop;
- purge_sys->state = PURGE_STATE_STOP;
- if (!purge_sys->running) {
- goto unlock;
- }
- ib::info() << "Waiting for purge to stop";
- do {
- rw_lock_x_unlock(&purge_sys->latch);
- os_thread_sleep(10000);
- rw_lock_x_lock(&purge_sys->latch);
- } while (purge_sys->running);
- goto unlock;
- case PURGE_STATE_RUN:
- ut_ad(srv_n_purge_threads > 0);
- ++purge_sys->n_stop;
- ib::info() << "Stopping purge";
-
- /* We need to wakeup the purge thread in case it is suspended,
- so that it can acknowledge the state change. */
-
- const int64_t sig_count = os_event_reset(purge_sys->event);
- purge_sys->state = PURGE_STATE_STOP;
- rw_lock_x_unlock(&purge_sys->latch);
- srv_purge_wakeup();
- /* Wait for purge coordinator to signal that it
- is suspended. */
- os_event_wait_low(purge_sys->event, sig_count);
- }
-
- MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
-}
-
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-void
-trx_purge_run(void)
-/*===============*/
-{
- rw_lock_x_lock(&purge_sys->latch);
-
- switch (purge_sys->state) {
- case PURGE_STATE_EXIT:
- /* Shutdown must have been initiated during
- FLUSH TABLES FOR EXPORT. */
- ut_ad(!srv_undo_sources);
- break;
- case PURGE_STATE_INIT:
- case PURGE_STATE_DISABLED:
- ut_error;
-
- case PURGE_STATE_RUN:
- ut_a(!purge_sys->n_stop);
- break;
- case PURGE_STATE_STOP:
- ut_a(purge_sys->n_stop);
- if (--purge_sys->n_stop == 0) {
-
- ib::info() << "Resuming purge";
-
- purge_sys->state = PURGE_STATE_RUN;
- }
-
- MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- srv_purge_wakeup();
+ if (!enabled())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ return;
+ }
+
+ int32_t paused= my_atomic_add32_explicit(&m_paused, -1,
+ MY_MEMORY_ORDER_RELAXED);
+ ut_a(paused);
+
+ if (paused == 1)
+ {
+ ib::info() << "Resuming purge";
+ srv_purge_wakeup();
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
+ }
}
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index 821da9abd55..9888b14190d 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -31,7 +31,6 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0log.h"
#include "dict0dict.h"
#include "ut0mem.h"
-#include "read0read.h"
#include "row0ext.h"
#include "row0upd.h"
#include "que0que.h"
@@ -40,61 +39,78 @@ Created 3/26/1996 Heikki Tuuri
#include "row0row.h"
#include "row0mysql.h"
+/** The search tuple corresponding to TRX_UNDO_INSERT_METADATA */
+const dtuple_t trx_undo_metadata = {
+ REC_INFO_METADATA, 0, 0,
+ NULL, 0, NULL,
+ UT_LIST_NODE_T(dtuple_t)()
+#ifdef UNIV_DEBUG
+ , DATA_TUPLE_MAGIC_N
+#endif /* UNIV_DEBUG */
+};
+
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
-/**********************************************************************//**
-Writes the mtr log entry of the inserted undo log record on the undo log
-page. */
-UNIV_INLINE
-void
-trx_undof_page_add_undo_rec_log(
-/*============================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint old_free, /*!< in: start offset of the inserted entry */
- ulint new_free, /*!< in: end offset of the entry */
- mtr_t* mtr) /*!< in: mtr */
+/** Write redo log of writing an undo log record.
+@param[in] undo_block undo log page
+@param[in] old_free start offset of the undo log record
+@param[in] new_free end offset of the undo log record
+@param[in,out] mtr mini-transaction */
+static void trx_undof_page_add_undo_rec_log(const buf_block_t* undo_block,
+ ulint old_free, ulint new_free,
+ mtr_t* mtr)
{
- byte* log_ptr;
- const byte* log_end;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
-
- if (log_ptr == NULL) {
-
+ ut_ad(old_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ ut_ad(new_free >= old_free);
+ ut_ad(new_free < srv_page_size);
+ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_block->frame)
+ == new_free);
+ mtr->set_modified();
+ switch (mtr->get_log_mode()) {
+ case MTR_LOG_NONE:
+ case MTR_LOG_NO_REDO:
return;
+ case MTR_LOG_SHORT_INSERTS:
+ ut_ad(0);
+ /* fall through */
+ case MTR_LOG_ALL:
+ break;
}
- log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
- log_ptr = mlog_write_initial_log_record_fast(
- undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
- len = new_free - old_free - 4;
-
+ const uint32_t
+ len = uint32_t(new_free - old_free - 4),
+ reserved = std::min<uint32_t>(11 + 13 + len,
+ mtr->get_log()->MAX_DATA_SIZE);
+ byte* log_ptr = mtr->get_log()->open(reserved);
+ const byte* log_end = log_ptr + reserved;
+ log_ptr = mlog_write_initial_log_record_low(
+ MLOG_UNDO_INSERT,
+ undo_block->page.id.space(), undo_block->page.id.page_no(),
+ log_ptr, mtr);
mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- if (log_ptr + len <= log_end) {
- memcpy(log_ptr, undo_page + old_free + 2, len);
- mlog_close(mtr, log_ptr + len);
+ if (log_ptr + 2 + len <= log_end) {
+ memcpy(log_ptr + 2, undo_block->frame + old_free + 2, len);
+ mlog_close(mtr, log_ptr + 2 + len);
} else {
- mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, undo_page + old_free + 2, len);
+ mlog_close(mtr, log_ptr + 2);
+ mtr->get_log()->push(undo_block->frame + old_free + 2, len);
}
}
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
+/** Parse MLOG_UNDO_INSERT.
+@param[in] ptr log record
+@param[in] end_ptr end of log record buffer
+@param[in,out] page page or NULL
+@return end of log record
+@retval NULL if the log record is incomplete */
byte*
trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page) /*!< in: page or NULL */
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* page)
{
ulint len;
- byte* rec;
- ulint first_free;
if (end_ptr < ptr + 2) {
@@ -109,39 +125,32 @@ trx_undo_parse_add_undo_rec(
return(NULL);
}
- if (page == NULL) {
-
- return(ptr + len);
- }
-
- first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- rec = page + first_free;
+ if (page) {
+ ulint first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ byte* rec = page + first_free;
- mach_write_to_2(rec, first_free + 4 + len);
- mach_write_to_2(rec + 2 + len, first_free);
+ mach_write_to_2(rec, first_free + 4 + len);
+ mach_write_to_2(rec + 2 + len, first_free);
- mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- first_free + 4 + len);
- ut_memcpy(rec + 2, ptr, len);
+ mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ first_free + 4 + len);
+ memcpy(rec + 2, ptr, len);
+ }
- return(ptr + len);
+ return(const_cast<byte*>(ptr + len));
}
-/**********************************************************************//**
-Calculates the free space left for extending an undo log record.
+/** Calculate the free space left for extending an undo log record.
+@param[in] undo_block undo log page
+@param[in] ptr current end of the undo page
@return bytes left */
-UNIV_INLINE
-ulint
-trx_undo_left(
-/*==========*/
- const page_t* page, /*!< in: undo log page */
- const byte* ptr) /*!< in: pointer to page */
+static ulint trx_undo_left(const buf_block_t* undo_block, const byte* ptr)
{
- /* The '- 10' is a safety margin, in case we have some small
+ /* The 10 is a safety margin, in case we have some small
calculation error below */
-
- return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
+ return srv_page_size - ulint(ptr - undo_block->frame)
+ - (10 + FIL_PAGE_DATA_END);
}
/**********************************************************************//**
@@ -153,7 +162,7 @@ static
ulint
trx_undo_page_set_next_prev_and_add(
/*================================*/
- page_t* undo_page, /*!< in/out: undo log page */
+ buf_block_t* undo_block, /*!< in/out: undo log page */
byte* ptr, /*!< in: ptr up to where data has been
written on this undo page. */
mtr_t* mtr) /*!< in: mtr */
@@ -165,15 +174,15 @@ trx_undo_page_set_next_prev_and_add(
that points to the next free
offset value within undo_page.*/
- ut_ad(ptr > undo_page);
- ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
-
- if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
+ ut_ad(ptr > undo_block->frame);
+ ut_ad(ptr < undo_block->frame + srv_page_size);
+ if (UNIV_UNLIKELY(trx_undo_left(undo_block, ptr) < 2)) {
return(0);
}
- ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
+ ptr_to_first_free = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_block->frame;
first_free = mach_read_from_2(ptr_to_first_free);
@@ -181,16 +190,16 @@ trx_undo_page_set_next_prev_and_add(
mach_write_to_2(ptr, first_free);
ptr += 2;
- end_of_rec = ptr - undo_page;
+ end_of_rec = ulint(ptr - undo_block->frame);
/* Write offset of the next undo log record */
- mach_write_to_2(undo_page + first_free, end_of_rec);
+ mach_write_to_2(undo_block->frame + first_free, end_of_rec);
/* Update the offset to first free undo record */
mach_write_to_2(ptr_to_first_free, end_of_rec);
/* Write this log entry to the UNDO log */
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ trx_undof_page_add_undo_rec_log(undo_block, first_free,
end_of_rec, mtr);
return(first_free);
@@ -202,7 +211,7 @@ static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
/** Write virtual column index info (index id and column position in index)
to the undo log
-@param[in,out] undo_page undo log page
+@param[in,out] undo_block undo log page
@param[in] table the table
@param[in] pos the virtual column position
@param[in] ptr undo log record being written
@@ -212,7 +221,7 @@ to the undo log
static
byte*
trx_undo_log_v_idx(
- page_t* undo_page,
+ buf_block_t* undo_block,
const dict_table_t* table,
ulint pos,
byte* ptr,
@@ -231,7 +240,7 @@ trx_undo_log_v_idx(
1 byte for undo log record format version marker */
ulint size = n_idx * (5 + 5) + 5 + 2 + (first_v_col ? 1 : 0);
- if (trx_undo_left(undo_page, ptr) < size) {
+ if (trx_undo_left(undo_block, ptr) < size) {
return(NULL);
}
@@ -260,7 +269,7 @@ trx_undo_log_v_idx(
ptr += mach_write_compressed(ptr, v_index.nth_field);
}
- mach_write_to_2(old_ptr, ptr - old_ptr);
+ mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
return(ptr);
}
@@ -304,7 +313,7 @@ trx_undo_read_v_idx_low(
if (index->id == id) {
const dict_col_t* col = dict_index_get_nth_col(
index, pos);
- ut_ad(dict_col_is_virtual(col));
+ ut_ad(col->is_virtual());
const dict_v_col_t* vcol = reinterpret_cast<
const dict_v_col_t*>(col);
*col_pos = vcol->v_pos;
@@ -361,7 +370,7 @@ trx_undo_read_v_idx(
}
/** Reports in the undo log of an insert of virtual columns.
-@param[in] undo_page undo log page
+@param[in] undo_block undo log page
@param[in] table the table
@param[in] row dtuple contains the virtual columns
@param[in,out] ptr log ptr
@@ -369,7 +378,7 @@ trx_undo_read_v_idx(
static
bool
trx_undo_report_insert_virtual(
- page_t* undo_page,
+ buf_block_t* undo_block,
dict_table_t* table,
const dtuple_t* row,
byte** ptr)
@@ -377,7 +386,7 @@ trx_undo_report_insert_virtual(
byte* start = *ptr;
bool first_v_col = true;
- if (trx_undo_left(undo_page, *ptr) < 2) {
+ if (trx_undo_left(undo_block, *ptr) < 2) {
return(false);
}
@@ -394,7 +403,7 @@ trx_undo_report_insert_virtual(
if (col->m_col.ord_part) {
/* make sure enought space to write the length */
- if (trx_undo_left(undo_page, *ptr) < 5) {
+ if (trx_undo_left(undo_block, *ptr) < 5) {
return(false);
}
@@ -402,7 +411,7 @@ trx_undo_report_insert_virtual(
pos += REC_MAX_N_FIELDS;
*ptr += mach_write_compressed(*ptr, pos);
- *ptr = trx_undo_log_v_idx(undo_page, table,
+ *ptr = trx_undo_log_v_idx(undo_block, table,
col_no, *ptr, first_v_col);
first_v_col = false;
@@ -423,8 +432,8 @@ trx_undo_report_insert_virtual(
flen = max_len;
}
- if (trx_undo_left(undo_page, *ptr) < flen + 5) {
-
+ if (trx_undo_left(undo_block, *ptr)
+ < flen + 5) {
return(false);
}
*ptr += mach_write_compressed(*ptr, flen);
@@ -432,8 +441,7 @@ trx_undo_report_insert_virtual(
ut_memcpy(*ptr, vfield->data, flen);
*ptr += flen;
} else {
- if (trx_undo_left(undo_page, *ptr) < 5) {
-
+ if (trx_undo_left(undo_block, *ptr) < 5) {
return(false);
}
@@ -443,7 +451,7 @@ trx_undo_report_insert_virtual(
}
/* Always mark the end of the log with 2 bytes length field */
- mach_write_to_2(start, *ptr - start);
+ mach_write_to_2(start, ulint(*ptr - start));
return(true);
}
@@ -455,7 +463,7 @@ static
ulint
trx_undo_page_report_insert(
/*========================*/
- page_t* undo_page, /*!< in: undo log page */
+ buf_block_t* undo_block, /*!< in: undo log page */
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* clust_entry, /*!< in: index entry which will be
@@ -467,19 +475,21 @@ trx_undo_page_report_insert(
ulint i;
ut_ad(dict_index_is_clust(index));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
+ /* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
+ TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
+ TRX_UNDO_INSERT == 1 into insert_undo pages,
+ or TRX_UNDO_UPDATE == 2 into update_undo pages. */
+ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + undo_block->frame) <= 2);
- ut_ad(first_free <= UNIV_PAGE_SIZE);
+ first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_block->frame);
+ ptr = undo_block->frame + first_free;
- if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
+ ut_ad(first_free <= srv_page_size);
+ if (trx_undo_left(undo_block, ptr) < 2 + 1 + 11 + 11) {
/* Not enough space for writing the general parameters */
-
return(0);
}
@@ -493,13 +503,21 @@ trx_undo_page_report_insert(
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
+ if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) {
+ ut_ad(clust_entry->info_bits == REC_INFO_METADATA);
+ ut_ad(index->is_instant());
+ ut_ad(undo_block->frame[first_free + 2]
+ == TRX_UNDO_INSERT_REC);
+ undo_block->frame[first_free + 2] = TRX_UNDO_INSERT_METADATA;
+ goto done;
+ }
for (i = 0; i < dict_index_get_n_unique(index); i++) {
const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
ulint flen = dfield_get_len(field);
- if (trx_undo_left(undo_page, ptr) < 5) {
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
@@ -507,7 +525,7 @@ trx_undo_page_report_insert(
ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
+ if (trx_undo_left(undo_block, ptr) < flen) {
return(0);
}
@@ -519,12 +537,13 @@ trx_undo_page_report_insert(
if (index->table->n_v_cols) {
if (!trx_undo_report_insert_virtual(
- undo_page, index->table, clust_entry, &ptr)) {
+ undo_block, index->table, clust_entry, &ptr)) {
return(0);
}
}
- return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
+done:
+ return(trx_undo_page_set_next_prev_and_add(undo_block, ptr, mtr));
}
/**********************************************************************//**
@@ -596,7 +615,7 @@ trx_undo_rec_get_col_val(
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
/* we do not have access to index->table here
- ut_ad(dict_table_get_format(index->table) >= UNIV_FORMAT_B
+ ut_ad(dict_table_has_atomic_blobs(index->table)
|| *len >= col->max_prefix
+ BTR_EXTERN_FIELD_REF_SIZE);
*/
@@ -629,7 +648,7 @@ trx_undo_rec_get_row_ref(
used, as we do NOT copy the data in the
record! */
dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
+ const dtuple_t**ref, /*!< out, own: row reference */
mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
{
@@ -641,17 +660,17 @@ trx_undo_rec_get_row_ref(
ref_len = dict_index_get_n_unique(index);
- *ref = dtuple_create(heap, ref_len);
+ dtuple_t* tuple = dtuple_create(heap, ref_len);
+ *ref = tuple;
- dict_index_copy_types(*ref, index, ref_len);
+ dict_index_copy_types(tuple, index, ref_len);
for (i = 0; i < ref_len; i++) {
- dfield_t* dfield;
const byte* field;
ulint len;
ulint orig_len;
- dfield = dtuple_get_nth_field(*ref, i);
+ dfield_t* dfield = dtuple_get_nth_field(tuple, i);
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
@@ -761,7 +780,7 @@ trx_undo_page_report_modify_ext(
}
/* Encode spatial status into length. */
- spatial_len |= spatial_status << SPATIAL_STATUS_SHIFT;
+ spatial_len |= ulint(spatial_status) << SPATIAL_STATUS_SHIFT;
if (spatial_status == SPATIAL_ONLY) {
/* If the column is only used by gis index, log its
@@ -840,7 +859,7 @@ static
ulint
trx_undo_page_report_modify(
/*========================*/
- page_t* undo_page, /*!< in: undo log page */
+ buf_block_t* undo_block, /*!< in: undo log page */
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: clustered index where update or
delete marking is done */
@@ -856,48 +875,46 @@ trx_undo_page_report_modify(
virtual column info */
mtr_t* mtr) /*!< in: mtr */
{
- dict_table_t* table = index->table;
ulint first_free;
byte* ptr;
- const byte* field;
- ulint flen;
- ulint col_no;
- ulint type_cmpl;
- byte* type_cmpl_ptr;
- ulint i;
- trx_id_t trx_id;
- ibool ignore_prefix = FALSE;
- byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE];
- bool first_v_col = true;
- ut_a(dict_index_is_clust(index));
+ ut_ad(index->is_primary());
ut_ad(rec_offs_validate(rec, index, offsets));
+ /* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
+ TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
+ TRX_UNDO_INSERT == 1 into insert_undo pages,
+ or TRX_UNDO_UPDATE == 2 into update_undo pages. */
ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
- + undo_page) == TRX_UNDO_UPDATE
- || (dict_table_is_temporary(table)
- && mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
- + undo_page) == TRX_UNDO_INSERT));
- trx_undo_t* update_undo = dict_table_is_temporary(table)
- ? NULL : trx->rsegs.m_redo.update_undo;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
+ + undo_block->frame) <= 2);
- ut_ad(first_free <= UNIV_PAGE_SIZE);
+ first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_block->frame);
+ ptr = undo_block->frame + first_free;
- if (trx_undo_left(undo_page, ptr) < 50) {
+ ut_ad(first_free <= srv_page_size);
+ if (trx_undo_left(undo_block, ptr) < 50) {
/* NOTE: the value 50 must be big enough so that the general
fields written below fit on the undo log page */
-
- return(0);
+ return 0;
}
/* Reserve 2 bytes for the pointer to the next undo log record */
ptr += 2;
+ dict_table_t* table = index->table;
+ const byte* field;
+ ulint flen;
+ ulint col_no;
+ ulint type_cmpl;
+ byte* type_cmpl_ptr;
+ ulint i;
+ trx_id_t trx_id;
+ ibool ignore_prefix = FALSE;
+ byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE];
+ bool first_v_col = true;
+
/* Store first some general parameters to the undo log */
if (!update) {
@@ -943,8 +960,8 @@ trx_undo_page_report_modify(
allowed to ignore blob prefixes if the delete marking was done
by some other trx as it must have committed by now for us to
allow an over-write. */
- if (ignore_prefix) {
- ignore_prefix = (trx_id != trx->id);
+ if (trx_id == trx->id) {
+ ignore_prefix = false;
}
ptr += mach_u64_write_compressed(ptr, trx_id);
@@ -962,22 +979,22 @@ trx_undo_page_report_modify(
for (i = 0; i < dict_index_get_n_unique(index); i++) {
+ /* The ordering columns must not be instant added columns. */
+ ut_ad(!rec_offs_nth_default(offsets, i));
field = rec_get_nth_field(rec, offsets, i, &flen);
/* The ordering columns must not be stored externally. */
ut_ad(!rec_offs_nth_extern(offsets, i));
ut_ad(dict_index_get_nth_col(index, i)->ord_part);
- if (trx_undo_left(undo_page, ptr) < 5) {
-
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
+ if (trx_undo_left(undo_block, ptr) < flen) {
return(0);
}
@@ -990,8 +1007,7 @@ trx_undo_page_report_modify(
/* Save to the undo log the old values of the columns to be updated. */
if (update) {
- if (trx_undo_left(undo_page, ptr) < 5) {
-
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
@@ -1029,8 +1045,7 @@ trx_undo_page_report_modify(
ulint pos = fld->field_no;
/* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
@@ -1054,7 +1069,7 @@ trx_undo_page_report_modify(
if (is_virtual) {
ut_ad(fld->field_no < table->n_v_def);
- ptr = trx_undo_log_v_idx(undo_page, table,
+ ptr = trx_undo_log_v_idx(undo_block, table,
fld->field_no, ptr,
first_v_col);
if (ptr == NULL) {
@@ -1077,12 +1092,11 @@ trx_undo_page_report_modify(
flen, max_v_log_len);
}
} else {
- field = rec_get_nth_field(rec, offsets,
- pos, &flen);
+ field = rec_get_nth_cfield(
+ rec, index, offsets, pos, &flen);
}
- if (trx_undo_left(undo_page, ptr) < 15) {
-
+ if (trx_undo_left(undo_block, ptr) < 15) {
return(0);
}
@@ -1105,21 +1119,13 @@ trx_undo_page_report_modify(
dict_table_page_size(table),
&field, &flen, SPATIAL_UNKNOWN);
- /* Notify purge that it eventually has to
- free the old externally stored field */
-
- if (update_undo) {
- update_undo->del_marks = TRUE;
- }
-
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
} else {
ptr += mach_write_compressed(ptr, flen);
}
if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
+ if (trx_undo_left(undo_block, ptr) < flen) {
return(0);
}
@@ -1136,16 +1142,15 @@ trx_undo_page_report_modify(
flen, max_v_log_len);
}
- if (trx_undo_left(undo_page, ptr) < 15) {
-
+ if (trx_undo_left(undo_block, ptr) < 15) {
return(0);
}
ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
+ if (trx_undo_left(undo_block, ptr)
+ < flen) {
return(0);
}
@@ -1179,12 +1184,7 @@ trx_undo_page_report_modify(
double mbr[SPDIMS * 2];
mem_heap_t* row_heap = NULL;
- if (update_undo) {
- update_undo->del_marks = TRUE;
- }
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
@@ -1250,16 +1250,15 @@ trx_undo_page_report_modify(
if (true) {
/* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5 + 15) {
-
+ if (trx_undo_left(undo_block, ptr) < 5 + 15) {
return(0);
}
ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos,
- &flen);
+ field = rec_get_nth_cfield(
+ rec, index, offsets, pos, &flen);
if (is_ext) {
const dict_col_t* col =
@@ -1299,9 +1298,8 @@ trx_undo_page_report_modify(
if (flen != UNIV_SQL_NULL
&& spatial_status != SPATIAL_ONLY) {
- if (trx_undo_left(undo_page, ptr)
+ if (trx_undo_left(undo_block, ptr)
< flen) {
-
return(0);
}
@@ -1310,7 +1308,7 @@ trx_undo_page_report_modify(
}
if (spatial_status != SPATIAL_NONE) {
- if (trx_undo_left(undo_page, ptr)
+ if (trx_undo_left(undo_block, ptr)
< DATA_MBR_LEN) {
return(0);
}
@@ -1341,8 +1339,7 @@ already_logged:
/* Write field number to undo log.
Make sure there is enought space in log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
+ if (trx_undo_left(undo_block, ptr) < 5) {
return(0);
}
@@ -1350,7 +1347,7 @@ already_logged:
ptr += mach_write_compressed(ptr, pos);
ut_ad(col_no < table->n_v_def);
- ptr = trx_undo_log_v_idx(undo_page, table,
+ ptr = trx_undo_log_v_idx(undo_block, table,
col_no, ptr,
first_v_col);
first_v_col = false;
@@ -1392,9 +1389,8 @@ already_logged:
ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr)
+ if (trx_undo_left(undo_block, ptr)
< flen) {
-
return(0);
}
@@ -1404,7 +1400,7 @@ already_logged:
}
}
- mach_write_to_2(old_ptr, ptr - old_ptr);
+ mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
if (row_heap) {
mem_heap_free(row_heap);
@@ -1413,22 +1409,20 @@ already_logged:
/*----------------------------------------*/
/* Write pointers to the previous and the next undo log records */
- if (trx_undo_left(undo_page, ptr) < 2) {
-
+ if (trx_undo_left(undo_block, ptr) < 2) {
return(0);
}
mach_write_to_2(ptr, first_free);
ptr += 2;
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
+ const ulint new_free = ulint(ptr - undo_block->frame);
+ mach_write_to_2(undo_block->frame + first_free, new_free);
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_block->frame, new_free);
/* Write to the REDO log about this change in the UNDO log */
-
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
+ trx_undof_page_add_undo_rec_log(undo_block, first_free, new_free, mtr);
return(first_free);
}
@@ -1511,7 +1505,7 @@ trx_undo_update_rec_get_update(
buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
- trx_write_trx_id(buf, trx_id);
+ mach_write_to_6(buf, trx_id);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
@@ -1539,6 +1533,7 @@ trx_undo_update_rec_get_update(
ulint orig_len;
bool is_virtual;
+ upd_field = upd_get_nth_field(update, i);
field_no = mach_read_next_compressed(&ptr);
is_virtual = (field_no >= REC_MAX_N_FIELDS);
@@ -1550,27 +1545,6 @@ trx_undo_update_rec_get_update(
index->table, ptr, first_v_col, &is_undo_log,
&field_no);
first_v_col = false;
- } else if (field_no >= dict_index_get_n_fields(index)) {
- ib::error() << "Trying to access update undo rec"
- " field " << field_no
- << " in index " << index->name
- << " of table " << index->table->name
- << " but index has only "
- << dict_index_get_n_fields(index)
- << " fields " << BUG_REPORT_MSG
- << ". Run also CHECK TABLE "
- << index->table->name << "."
- " n_fields = " << n_fields << ", i = " << i
- << ", ptr " << ptr;
-
- ut_ad(0);
- *upd = NULL;
- return(NULL);
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- if (is_virtual) {
/* This column could be dropped or no longer indexed */
if (field_no == ULINT_UNDEFINED) {
/* Mark this is no longer needed */
@@ -1584,10 +1558,31 @@ trx_undo_update_rec_get_update(
continue;
}
- upd_field_set_v_field_no(
- upd_field, field_no, index);
- } else {
+ upd_field_set_v_field_no(upd_field, field_no, index);
+ } else if (field_no < index->n_fields) {
upd_field_set_field_no(upd_field, field_no, index);
+ } else if (update->info_bits == REC_INFO_MIN_REC_FLAG
+ && index->is_instant()) {
+ /* This must be a rollback of a subsequent
+ instant ADD COLUMN operation. This will be
+ detected and handled by btr_cur_trim(). */
+ upd_field->field_no = field_no;
+ upd_field->orig_len = 0;
+ } else {
+ ib::error() << "Trying to access update undo rec"
+ " field " << field_no
+ << " in index " << index->name
+ << " of table " << index->table->name
+ << " but index has only "
+ << dict_index_get_n_fields(index)
+ << " fields " << BUG_REPORT_MSG
+ << ". Run also CHECK TABLE "
+ << index->table->name << "."
+ " n_fields = " << n_fields << ", i = " << i;
+
+ ut_ad(0);
+ *upd = NULL;
+ return(NULL);
}
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
@@ -1680,7 +1675,7 @@ trx_undo_rec_get_partial_row(
bool first_v_col = true;
bool is_undo_log = true;
- ut_ad(dict_index_is_clust(index));
+ ut_ad(index->is_primary());
*row = dtuple_create_with_vcol(
heap, dict_table_get_n_cols(index->table),
@@ -1817,8 +1812,7 @@ trx_undo_rec_get_partial_row(
&& spatial_status != SPATIAL_ONLY) {
ut_a(dfield_get_len(dfield)
>= BTR_EXTERN_FIELD_REF_SIZE);
- ut_a(dict_table_get_format(index->table)
- >= UNIV_FORMAT_B
+ ut_a(dict_table_has_atomic_blobs(index->table)
|| dfield_get_len(dfield)
>= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
@@ -1829,51 +1823,22 @@ trx_undo_rec_get_partial_row(
return(const_cast<byte*>(ptr));
}
-/***********************************************************************//**
-Erases the unused undo log page end.
-@return TRUE if the page contained something, FALSE if it was empty */
-static MY_ATTRIBUTE((nonnull))
-ibool
-trx_undo_erase_page_end(
-/*====================*/
- page_t* undo_page, /*!< in/out: undo page whose end to erase */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+/** Erase the unused undo log page end.
+@param[in,out] undo_page undo log page
+@return whether the page contained something */
+bool
+trx_undo_erase_page_end(page_t* undo_page)
{
ulint first_free;
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
- memset(undo_page + first_free, 0xff,
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
+ memset(undo_page + first_free, 0,
+ (srv_page_size - FIL_PAGE_DATA_END) - first_free);
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
}
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr != NULL);
- ut_ad(end_ptr != NULL);
-
- if (page == NULL) {
-
- return(ptr);
- }
-
- trx_undo_erase_page_end(page, mtr);
-
- return(ptr);
-}
-
/** Report a RENAME TABLE operation.
@param[in,out] trx transaction
@param[in] table table that is being renamed
@@ -1890,7 +1855,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
+ block->frame;
ulint first_free = mach_read_from_2(ptr_first_free);
ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- ut_ad(first_free <= UNIV_PAGE_SIZE);
+ ut_ad(first_free <= srv_page_size);
byte* start = block->frame + first_free;
size_t len = strlen(table->name.m_name);
const size_t fixed = 2 + 1 + 11 + 11 + 2;
@@ -1900,7 +1865,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE
< UNIV_PAGE_SIZE_MIN - 10 - FIL_PAGE_DATA_END);
- if (trx_undo_left(block->frame, start) < fixed + len) {
+ if (trx_undo_left(block, start) < fixed + len) {
ut_ad(first_free > TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_HDR_SIZE);
return 0;
@@ -1918,7 +1883,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
mach_write_to_2(start, offset);
mach_write_to_2(ptr_first_free, offset);
- trx_undof_page_add_undo_rec_log(block->frame, first_free, offset, mtr);
+ trx_undof_page_add_undo_rec_log(block, first_free, offset, mtr);
return first_free;
}
@@ -1931,48 +1896,33 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
ut_ad(!trx->read_only);
ut_ad(trx->id);
ut_ad(!table->is_temporary());
- ut_ad(srv_safe_truncate);
-
- trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
- trx_undo_t** pundo = &trx->rsegs.m_redo.insert_undo;
- mutex_enter(&trx->undo_mutex);
- dberr_t err = *pundo
- ? DB_SUCCESS
- : trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
- ut_ad((err == DB_SUCCESS) == (*pundo != NULL));
- if (trx_undo_t* undo = *pundo) {
- mtr_t mtr;
- mtr.start();
-
- buf_block_t* block = buf_page_get_gen(
- page_id_t(undo->space, undo->last_page_no),
- univ_page_size, RW_X_LATCH,
- buf_pool_is_obsolete(undo->withdraw_clock)
- ? NULL : undo->guess_block,
- BUF_GET, __FILE__, __LINE__, &mtr, &err);
- ut_ad((err == DB_SUCCESS) == !!block);
- for (ut_d(int loop_count = 0); block;) {
+ mtr_t mtr;
+ dberr_t err;
+ mtr.start();
+ if (buf_block_t* block = trx_undo_assign(trx, &err, &mtr)) {
+ trx_undo_t* undo = trx->rsegs.m_redo.undo;
+ ut_ad(err == DB_SUCCESS);
+ ut_ad(undo);
+ for (ut_d(int loop_count = 0);;) {
ut_ad(++loop_count < 2);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
ut_ad(undo->last_page_no == block->page.id.page_no());
if (ulint offset = trx_undo_page_report_rename(
trx, table, block, &mtr)) {
undo->withdraw_clock = buf_withdraw_clock;
- undo->empty = FALSE;
undo->top_page_no = undo->last_page_no;
undo->top_offset = offset;
undo->top_undo_no = trx->undo_no++;
undo->guess_block = block;
+ ut_ad(!undo->empty());
- trx->undo_rseg_space = rseg->space;
err = DB_SUCCESS;
break;
} else {
mtr.commit();
mtr.start();
- block = trx_undo_add_page(trx, undo, &mtr);
+ block = trx_undo_add_page(undo, &mtr);
if (!block) {
err = DB_OUT_OF_FILE_SPACE;
break;
@@ -1983,7 +1933,6 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
mtr.commit();
}
- mutex_exit(&trx->undo_mutex);
return err;
}
@@ -2017,8 +1966,6 @@ trx_undo_report_row_operation(
undo log record */
{
trx_t* trx;
- ulint page_no;
- buf_block_t* undo_block;
mtr_t mtr;
#ifdef UNIV_DEBUG
int loop_count = 0;
@@ -2038,7 +1985,7 @@ trx_undo_report_row_operation(
mtr.start();
trx_undo_t** pundo;
trx_rseg_t* rseg;
- const bool is_temp = dict_table_is_temporary(index->table);
+ const bool is_temp = index->table->is_temporary();
if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -2048,63 +1995,32 @@ trx_undo_report_row_operation(
} else {
ut_ad(!trx->read_only);
ut_ad(trx->id);
- /* Keep INFORMATION_SCHEMA.TABLES.UPDATE_TIME
- up-to-date for persistent tables. Temporary tables are
- not listed there. */
- trx->mod_tables.insert(index->table);
-
- pundo = !rec
- ? &trx->rsegs.m_redo.insert_undo
- : &trx->rsegs.m_redo.update_undo;
+ pundo = &trx->rsegs.m_redo.undo;
rseg = trx->rsegs.m_redo.rseg;
}
- mutex_enter(&trx->undo_mutex);
- dberr_t err;
+ dberr_t err;
+ buf_block_t* undo_block = trx_undo_assign_low(trx, rseg, pundo,
+ &err, &mtr);
+ trx_undo_t* undo = *pundo;
- if (*pundo) {
- err = DB_SUCCESS;
- } else if (!rec || is_temp) {
- err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
- } else {
- err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_UPDATE);
- }
-
- trx_undo_t* undo = *pundo;
-
- ut_ad((err == DB_SUCCESS) == (undo != NULL));
- if (undo == NULL) {
+ ut_ad((err == DB_SUCCESS) == (undo_block != NULL));
+ if (UNIV_UNLIKELY(undo_block == NULL)) {
goto err_exit;
}
- page_no = undo->last_page_no;
-
- undo_block = buf_page_get_gen(
- page_id_t(undo->space, page_no), univ_page_size, RW_X_LATCH,
- buf_pool_is_obsolete(undo->withdraw_clock)
- ? NULL : undo->guess_block, BUF_GET, __FILE__, __LINE__,
- &mtr, &err);
-
- buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
+ ut_ad(undo != NULL);
do {
- ut_ad(page_no == undo_block->page.id.page_no());
- page_t* undo_page = buf_block_get_frame(undo_block);
ulint offset = !rec
? trx_undo_page_report_insert(
- undo_page, trx, index, clust_entry, &mtr)
+ undo_block, trx, index, clust_entry, &mtr)
: trx_undo_page_report_modify(
- undo_page, trx, index, rec, offsets, update,
+ undo_block, trx, index, rec, offsets, update,
cmpl_info, clust_entry, &mtr);
if (UNIV_UNLIKELY(offset == 0)) {
- /* The record did not fit on the page. We erase the
- end segment of the undo log page and write a log
- record of it: this is to ensure that in the debug
- version the replicate page constructed using the log
- records stays identical to the original page */
-
- if (!trx_undo_erase_page_end(undo_page, &mtr)) {
+ if (!trx_undo_erase_page_end(undo_block->frame)) {
/* The record did not fit on an empty
undo page. Discard the freshly allocated
page and return an error. */
@@ -2118,7 +2034,7 @@ trx_undo_report_row_operation(
first, because it may be holding lower-level
latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
- mtr_commit(&mtr);
+ mtr.commit();
mtr.start();
if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -2138,22 +2054,39 @@ trx_undo_report_row_operation(
undo->withdraw_clock = buf_withdraw_clock;
mtr_commit(&mtr);
- undo->empty = FALSE;
- undo->top_page_no = page_no;
+ undo->top_page_no = undo_block->page.id.page_no();
undo->top_offset = offset;
undo->top_undo_no = trx->undo_no++;
undo->guess_block = undo_block;
-
- trx->undo_rseg_space = rseg->space;
-
- mutex_exit(&trx->undo_mutex);
+ ut_ad(!undo->empty());
+
+ if (!is_temp) {
+ const undo_no_t limit = undo->top_undo_no;
+ /* Determine if this is the first time
+ when this transaction modifies a
+ system-versioned column in this table. */
+ trx_mod_table_time_t& time
+ = trx->mod_tables.insert(
+ trx_mod_tables_t::value_type(
+ index->table, limit))
+ .first->second;
+ ut_ad(time.valid(limit));
+
+ if (!time.is_versioned()
+ && index->table->versioned_by_id()
+ && (!rec /* INSERT */
+ || (update
+ && update->affects_versioned()))) {
+ time.set_versioned(limit);
+ }
+ }
*roll_ptr = trx_undo_build_roll_ptr(
- !rec, rseg->id, page_no, offset);
+ !rec, rseg->id, undo->top_page_no, offset);
return(DB_SUCCESS);
}
- ut_ad(page_no == undo->last_page_no);
+ ut_ad(undo_block->page.id.page_no() == undo->last_page_no);
/* We have to extend the undo log by one page */
@@ -2164,12 +2097,11 @@ trx_undo_report_row_operation(
mtr.set_log_mode(MTR_LOG_NO_REDO);
}
- undo_block = trx_undo_add_page(trx, undo, &mtr);
- page_no = undo->last_page_no;
+ undo_block = trx_undo_add_page(undo, &mtr);
DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
undo_block = NULL;);
- } while (undo_block != NULL);
+ } while (UNIV_LIKELY(undo_block != NULL));
ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
DB_OUT_OF_FILE_SPACE,
@@ -2178,14 +2110,13 @@ trx_undo_report_row_operation(
" log pages. Please add new data file to the tablespace or"
" check if filesystem is full or enable auto-extension for"
" the tablespace",
- undo->space == TRX_SYS_SPACE
+ undo->rseg->space == fil_system.sys_space
? "system" : is_temp ? "temporary" : "undo");
/* Did not succeed: out of space */
err = DB_OUT_OF_FILE_SPACE;
err_exit:
- mutex_exit(&trx->undo_mutex);
mtr_commit(&mtr);
return(err);
}
@@ -2214,12 +2145,13 @@ trx_undo_get_undo_rec_low(
&offset);
ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- rseg = trx_sys->rseg_array[rseg_id];
+ rseg = trx_sys.rseg_array[rseg_id];
+ ut_ad(rseg->is_persistent());
mtr_start(&mtr);
undo_page = trx_undo_page_get_s_latched(
- page_id_t(rseg->space, page_no), &mtr);
+ page_id_t(rseg->space->id, page_no), &mtr);
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
@@ -2251,14 +2183,14 @@ trx_undo_get_undo_rec(
{
bool missing_history;
- rw_lock_s_lock(&purge_sys->latch);
+ rw_lock_s_lock(&purge_sys.latch);
- missing_history = purge_sys->view.changes_visible(trx_id, name);
+ missing_history = purge_sys.view.changes_visible(trx_id, name);
if (!missing_history) {
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
}
- rw_lock_s_unlock(&purge_sys->latch);
+ rw_lock_s_unlock(&purge_sys.latch);
return(missing_history);
}
@@ -2320,12 +2252,13 @@ trx_undo_prev_version_build(
bool dummy_extern;
byte* buf;
- ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_S));
+ ut_ad(!index->table->is_temporary());
+ ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S));
ut_ad(mtr_memo_contains_page_flagged(index_mtr, index_rec,
MTR_MEMO_PAGE_S_FIX
| MTR_MEMO_PAGE_X_FIX));
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(dict_index_is_clust(index));
+ ut_a(index->is_primary());
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
@@ -2336,8 +2269,6 @@ trx_undo_prev_version_build(
return(true);
}
- ut_ad(!dict_table_is_temporary(index->table));
-
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
ut_ad(!index->table->skip_alter_undo);
@@ -2347,8 +2278,7 @@ trx_undo_prev_version_build(
&undo_rec)) {
if (v_status & TRX_UNDO_PREV_IN_PURGE) {
/* We are fetching the record being purged */
- undo_rec = trx_undo_get_undo_rec_low(
- roll_ptr, heap);
+ undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
} else {
/* The undo record may already have been purged,
during purge or semi-consistent read. */
@@ -2370,12 +2300,12 @@ trx_undo_prev_version_build(
&info_bits);
/* (a) If a clustered index record version is such that the
- trx id stamp in it is bigger than purge_sys->view, then the
+ trx id stamp in it is bigger than purge_sys.view, then the
BLOBs in that version are known to exist (the purge has not
progressed that far);
(b) if the version is the first version such that trx id in it
- is less than purge_sys->view, and it is not delete-marked,
+ is less than purge_sys.view, and it is not delete-marked,
then the BLOBs in that version are known to exist (the purge
cannot have purged the BLOBs referenced by that version
yet).
@@ -2414,19 +2344,19 @@ trx_undo_prev_version_build(
the BLOB. */
/* the row_upd_changes_disowned_external(update) call could be
- omitted, but the synchronization on purge_sys->latch is likely
+ omitted, but the synchronization on purge_sys.latch is likely
more expensive. */
if ((update->info_bits & REC_INFO_DELETED_FLAG)
&& row_upd_changes_disowned_external(update)) {
bool missing_extern;
- rw_lock_s_lock(&purge_sys->latch);
+ rw_lock_s_lock(&purge_sys.latch);
- missing_extern = purge_sys->view.changes_visible(
+ missing_extern = purge_sys.view.changes_visible(
trx_id, index->table->name);
- rw_lock_s_unlock(&purge_sys->latch);
+ rw_lock_s_unlock(&purge_sys.latch);
if (missing_extern) {
/* treat as a fresh insert, not to
@@ -2459,7 +2389,7 @@ trx_undo_prev_version_build(
heap, rec_offs_size(offsets)));
*old_vers = rec_copy(buf, rec, offsets);
- rec_offs_make_valid(*old_vers, index, offsets);
+ rec_offs_make_valid(*old_vers, index, true, offsets);
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
}
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index f1a19d91d0b..65aa454808f 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -34,7 +34,6 @@ Created 3/26/1996 Heikki Tuuri
#include "mach0data.h"
#include "pars0pars.h"
#include "que0que.h"
-#include "read0read.h"
#include "row0mysql.h"
#include "row0undo.h"
#include "srv0mon.h"
@@ -49,19 +48,53 @@ Created 3/26/1996 Heikki Tuuri
rollback */
static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
-/** true if trx_rollback_or_clean_all_recovered() thread is active */
-bool trx_rollback_or_clean_is_active;
+/** true if trx_rollback_all_recovered() thread is active */
+bool trx_rollback_is_active;
/** In crash recovery, the current trx to be rolled back; NULL otherwise */
const trx_t* trx_roll_crash_recv_trx;
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
- trx_t* trx); /*!< in: transaction */
+/** Finish transaction rollback.
+@param[in,out] trx transaction
+@return whether the rollback was completed normally
+@retval false if the rollback was aborted by shutdown */
+static bool trx_rollback_finish(trx_t* trx)
+{
+ trx->mod_tables.clear();
+ bool finished = trx->error_state == DB_SUCCESS;
+ if (UNIV_LIKELY(finished)) {
+ trx_commit(trx);
+ } else {
+ ut_a(trx->error_state == DB_INTERRUPTED);
+ ut_ad(!srv_is_being_started);
+ ut_a(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+ ut_d(trx->in_rollback = false);
+ if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) {
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list,
+ undo);
+ ut_free(undo);
+ undo = NULL;
+ }
+ if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list,
+ undo);
+ ut_free(undo);
+ undo = NULL;
+ }
+ if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
+ UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list,
+ undo);
+ ut_free(undo);
+ undo = NULL;
+ }
+ trx_commit_low(trx, NULL);
+ }
+
+ trx->lock.que_state = TRX_QUE_RUNNING;
+
+ return finished;
+}
/*******************************************************************//**
Rollback a transaction used in MySQL. */
@@ -91,7 +124,7 @@ trx_rollback_to_savepoint_low(
trx->error_state = DB_SUCCESS;
- if (trx->has_logged()) {
+ if (trx->has_logged_or_recovered()) {
ut_ad(trx->rsegs.m_redo.rseg != 0
|| trx->rsegs.m_noredo.rseg != 0);
@@ -115,13 +148,20 @@ trx_rollback_to_savepoint_low(
trx_rollback_finish(trx);
MONITOR_INC(MONITOR_TRX_ROLLBACK);
} else {
+ ut_a(trx->error_state == DB_SUCCESS);
+ const undo_no_t limit = savept->least_undo_no;
+ for (trx_mod_tables_t::iterator i = trx->mod_tables.begin();
+ i != trx->mod_tables.end(); ) {
+ trx_mod_tables_t::iterator j = i++;
+ ut_ad(j->second.valid());
+ if (j->second.rollback(limit)) {
+ trx->mod_tables.erase(j);
+ }
+ }
trx->lock.que_state = TRX_QUE_RUNNING;
MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
}
- ut_a(trx->error_state == DB_SUCCESS);
- ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
-
mem_heap_free(heap);
/* There might be work for utility threads.*/
@@ -170,8 +210,6 @@ trx_rollback_for_mysql_low(
trx->op_info = "";
- ut_a(trx->error_state == DB_SUCCESS);
-
return(trx->error_state);
}
@@ -180,7 +218,7 @@ trx_rollback_for_mysql_low(
@return error code or DB_SUCCESS */
dberr_t trx_rollback_for_mysql(trx_t* trx)
{
- /* We are reading trx->state without holding trx_sys->mutex
+ /* We are reading trx->state without holding trx_sys.mutex
here, because the rollback should be invoked for a running
active MySQL transaction (or recovered prepared transaction)
that is associated with the current thread. */
@@ -188,35 +226,38 @@ dberr_t trx_rollback_for_mysql(trx_t* trx)
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
trx->will_lock = 0;
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
return(DB_SUCCESS);
case TRX_STATE_ACTIVE:
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
assert_trx_nonlocking_or_in_list(trx);
return(trx_rollback_for_mysql_low(trx));
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
ut_ad(!trx_is_autocommit_non_locking(trx));
- if (trx->has_logged_persistent()) {
+ if (trx->rsegs.m_redo.undo || trx->rsegs.m_redo.old_insert) {
/* Change the undo log state back from
TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE
so that if the system gets killed,
recovery will perform the rollback. */
- trx_undo_ptr_t* undo_ptr = &trx->rsegs.m_redo;
+ ut_ad(!trx->rsegs.m_redo.undo
+ || trx->rsegs.m_redo.undo->rseg
+ == trx->rsegs.m_redo.rseg);
+ ut_ad(!trx->rsegs.m_redo.old_insert
+ || trx->rsegs.m_redo.old_insert->rseg
+ == trx->rsegs.m_redo.rseg);
mtr_t mtr;
mtr.start();
mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
- if (undo_ptr->insert_undo != NULL) {
- trx_undo_set_state_at_prepare(
- trx, undo_ptr->insert_undo,
- true, &mtr);
+ if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
+ trx_undo_set_state_at_prepare(trx, undo, true,
+ &mtr);
}
- if (undo_ptr->update_undo != NULL) {
- trx_undo_set_state_at_prepare(
- trx, undo_ptr->update_undo,
- true, &mtr);
+ if (trx_undo_t* undo = trx->rsegs.m_redo.old_insert) {
+ trx_undo_set_state_at_prepare(trx, undo, true,
+ &mtr);
}
mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
/* Persist the XA ROLLBACK, so that crash
@@ -263,11 +304,11 @@ trx_rollback_last_sql_stat_for_mysql(
{
dberr_t err;
- /* We are reading trx->state without holding trx_sys->mutex
+ /* We are reading trx->state without holding trx_sys.mutex
here, because the statement rollback should be invoked for a
running active MySQL transaction that is associated with the
current thread. */
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
@@ -390,7 +431,7 @@ trx_rollback_to_savepoint_for_mysql_low(
dberr_t err;
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
/* Free all savepoints strictly later than savep. */
@@ -443,11 +484,11 @@ trx_rollback_to_savepoint_for_mysql(
{
trx_named_savept_t* savep;
- /* We are reading trx->state without holding trx_sys->mutex
+ /* We are reading trx->state without holding trx_sys.mutex
here, because the savepoint rollback should be invoked for a
running active MySQL transaction that is associated with the
current thread. */
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
savep = trx_savepoint_find(trx, savepoint_name);
@@ -541,7 +582,7 @@ trx_release_savepoint_for_mysql(
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true)
|| trx_state_eq(trx, TRX_STATE_PREPARED, true));
- ut_ad(trx->in_mysql_trx_list);
+ ut_ad(trx->mysql_thd);
savep = trx_savepoint_find(trx, savepoint_name);
@@ -592,8 +633,6 @@ trx_rollback_active(
que_fork_t* fork;
que_thr_t* thr;
roll_node_t* roll_node;
- dict_table_t* table;
- ibool dictionary_locked = FALSE;
const trx_id_t trx_id = trx->id;
ut_ad(trx_id);
@@ -616,9 +655,11 @@ trx_rollback_active(
trx_roll_crash_recv_trx = trx;
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+ const bool dictionary_locked = trx_get_dict_operation(trx)
+ != TRX_DICT_OP_NONE;
+
+ if (dictionary_locked) {
row_mysql_lock_data_dictionary(trx);
- dictionary_locked = TRUE;
}
que_run_threads(thr);
@@ -626,46 +667,26 @@ trx_rollback_active(
que_run_threads(roll_node->undo_thr);
- if (trx->error_state != DB_SUCCESS) {
- ut_ad(trx->error_state == DB_INTERRUPTED);
- ut_ad(!srv_is_being_started);
- ut_ad(!srv_undo_sources);
- ut_ad(srv_fast_shutdown);
+ que_graph_free(
+ static_cast<que_t*>(roll_node->undo_thr->common.parent));
+
+ if (UNIV_UNLIKELY(!trx_rollback_finish(trx))) {
ut_ad(!dictionary_locked);
- que_graph_free(static_cast<que_t*>(
- roll_node->undo_thr->common.parent));
goto func_exit;
}
- trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
-
- /* Free the memory reserved by the undo graph */
- que_graph_free(static_cast<que_t*>(
- roll_node->undo_thr->common.parent));
-
ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
- && trx->table_id != 0) {
-
- ut_ad(dictionary_locked);
+ if (!dictionary_locked || !trx->table_id) {
+ } else if (dict_table_t* table = dict_table_open_on_id(
+ trx->table_id, TRUE, DICT_TABLE_OP_NORMAL)) {
+ ib::info() << "Dropping table " << table->name
+ << ", with id " << trx->table_id
+ << " in recovery";
- /* If the transaction was for a dictionary operation,
- we drop the relevant table only if it is not flagged
- as DISCARDED. If it still exists. */
+ dict_table_close_and_drop(trx, table);
- table = dict_table_open_on_id(
- trx->table_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (table && !dict_table_is_discarded(table)) {
- ib::warn() << "Dropping table '" << table->name
- << "', with id " << trx->table_id
- << " in recovery";
-
- dict_table_close_and_drop(trx, table);
-
- trx_commit_for_mysql(trx);
- }
+ trx_commit_for_mysql(trx);
}
ib::info() << "Rolled back recovered transaction " << trx_id;
@@ -680,195 +701,142 @@ func_exit:
trx_roll_crash_recv_trx = NULL;
}
-/*******************************************************************//**
-Rollback or clean up any resurrected incomplete transactions. It assumes
-that the caller holds the trx_sys_t::mutex and it will release the
-lock if it does a clean up or rollback.
-@return TRUE if the transaction was cleaned up or rolled back
-and trx_sys->mutex was released. */
-static
-ibool
-trx_rollback_resurrected(
-/*=====================*/
- trx_t* trx, /*!< in: transaction to rollback or clean */
- ibool* all) /*!< in/out: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-{
- ut_ad(trx_sys_mutex_own());
-
- /* The trx->is_recovered flag and trx->state are set
- atomically under the protection of the trx->mutex (and
- lock_sys->mutex) in lock_trx_release_locks(). We do not want
- to accidentally clean up a non-recovered transaction here. */
-
- trx_mutex_enter(trx);
- if (!trx->is_recovered) {
-func_exit:
- trx_mutex_exit(trx);
- return(FALSE);
- }
-
- switch (trx->state) {
- case TRX_STATE_COMMITTED_IN_MEMORY:
- trx_mutex_exit(trx);
- trx_sys_mutex_exit();
- ib::info() << "Cleaning up trx with id " << ib::hex(trx->id);
- trx_cleanup_at_db_startup(trx);
- trx_free_resurrected(trx);
- return(TRUE);
- case TRX_STATE_ACTIVE:
- if (!srv_is_being_started
- && !srv_undo_sources && srv_fast_shutdown) {
-fake_prepared:
- trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- *all = FALSE;
- goto func_exit;
- }
- trx_mutex_exit(trx);
+struct trx_roll_count_callback_arg
+{
+ uint32_t n_trx;
+ uint64_t n_rows;
+ trx_roll_count_callback_arg(): n_trx(0), n_rows(0) {}
+};
- if (*all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- trx_sys_mutex_exit();
- trx_rollback_active(trx);
- if (trx->error_state != DB_SUCCESS) {
- ut_ad(trx->error_state == DB_INTERRUPTED);
- trx->error_state = DB_SUCCESS;
- ut_ad(!srv_undo_sources);
- ut_ad(srv_fast_shutdown);
- mutex_enter(&trx_sys->mutex);
- trx_mutex_enter(trx);
- goto fake_prepared;
- }
- trx_free_for_background(trx);
- return(TRUE);
- }
- return(FALSE);
- case TRX_STATE_PREPARED:
- case TRX_STATE_PREPARED_RECOVERED:
- goto func_exit;
- case TRX_STATE_NOT_STARTED:
- break;
- }
- ut_error;
- goto func_exit;
+static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element,
+ trx_roll_count_callback_arg *arg)
+{
+ mutex_enter(&element->mutex);
+ if (trx_t *trx= element->trx)
+ {
+ if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
+ {
+ arg->n_trx++;
+ arg->n_rows+= trx->undo_no;
+ }
+ }
+ mutex_exit(&element->mutex);
+ return 0;
}
-/** Report progress when rolling back a row of a recovered transaction.
-@return whether the rollback should be aborted due to pending shutdown */
-bool
-trx_roll_must_shutdown()
+/** Report progress when rolling back a row of a recovered transaction. */
+void trx_roll_report_progress()
{
- const trx_t* trx = trx_roll_crash_recv_trx;
- ut_ad(trx);
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-
- if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
- && !srv_is_being_started
- && !srv_undo_sources && srv_fast_shutdown) {
- return true;
- }
-
time_t now = time(NULL);
- mutex_enter(&trx_sys->mutex);
mutex_enter(&recv_sys->mutex);
-
- if (recv_sys->report(now)) {
- ulint n_trx = 0;
- ulonglong n_rows = 0;
- for (const trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- t != NULL;
- t = UT_LIST_GET_NEXT(trx_list, t)) {
-
- assert_trx_in_rw_list(t);
- if (t->is_recovered
- && trx_state_eq(t, TRX_STATE_ACTIVE)) {
- n_trx++;
- n_rows += t->undo_no;
- }
- }
- if (n_rows > 0) {
- service_manager_extend_timeout(
- INNODB_EXTEND_TIMEOUT_INTERVAL,
- "To roll back: " ULINTPF " transactions, "
- "%llu rows", n_trx, n_rows);
- }
-
- ib::info() << "To roll back: " << n_trx << " transactions, "
- << n_rows << " rows";
- }
-
+ bool report = recv_sys->report(now);
mutex_exit(&recv_sys->mutex);
- mutex_exit(&trx_sys->mutex);
- return false;
-}
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-{
- trx_t* trx;
+ if (report) {
+ trx_roll_count_callback_arg arg;
- ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
+ /* Get number of recovered active transactions and number of
+ rows they modified. Numbers must be accurate, because only this
+ thread is allowed to touch recovered transactions. */
+ trx_sys.rw_trx_hash.iterate_no_dups(
+ reinterpret_cast<my_hash_walk_action>
+ (trx_roll_count_callback), &arg);
- if (trx_sys_get_n_rw_trx() == 0) {
+ if (arg.n_rows > 0) {
+ service_manager_extend_timeout(
+ INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "To roll back: " UINT32PF " transactions, "
+ UINT64PF " rows", arg.n_trx, arg.n_rows);
+ }
- return;
- }
+ ib::info() << "To roll back: " << arg.n_trx
+ << " transactions, " << arg.n_rows << " rows";
- if (all) {
- ib::info() << "Starting in background the rollback"
- " of recovered transactions";
}
+}
- /* Note: For XA recovered transactions, we rely on MySQL to
- do rollback. They will be in TRX_STATE_PREPARED state. If the server
- is shutdown and they are still lingering in trx_sys_t::trx_list
- then the shutdown will hang. */
-
- /* Loop over the transaction list as long as there are
- recovered transactions to clean up or recover. */
-
- do {
- trx_sys_mutex_enter();
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
+static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element,
+ std::vector<trx_t*> *trx_list)
+{
+ mutex_enter(&element->mutex);
+ if (trx_t *trx= element->trx)
+ {
+ mutex_enter(&trx->mutex);
+ if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
+ trx_list->push_back(trx);
+ mutex_exit(&trx->mutex);
+ }
+ mutex_exit(&element->mutex);
+ return 0;
+}
- /* If this function does a cleanup or rollback
- then it will release the trx_sys->mutex, therefore
- we need to reacquire it before retrying the loop. */
- if (trx_rollback_resurrected(trx, &all)) {
+/**
+ Rollback any incomplete transactions which were encountered in crash recovery.
- trx_sys_mutex_enter();
+ If the transaction already was committed, then we clean up a possible insert
+ undo log. If the transaction was not yet committed, then we roll it back.
- break;
- }
- }
+ Note: For XA recovered transactions, we rely on MySQL to
+ do rollback. They will be in TRX_STATE_PREPARED state. If the server
+ is shutdown and they are still lingering in trx_sys_t::trx_list
+ then the shutdown will hang.
- trx_sys_mutex_exit();
+ @param[in] all true=roll back all recovered active transactions;
+ false=roll back any incomplete dictionary transaction
+*/
- } while (trx != NULL);
-
- if (all) {
- ib::info() << "Rollback of non-prepared transactions"
- " completed";
- }
+void trx_rollback_recovered(bool all)
+{
+ std::vector<trx_t*> trx_list;
+
+ ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
+
+ /*
+ Collect list of recovered ACTIVE transaction ids first. Once collected, no
+ other thread is allowed to modify or remove these transactions from
+ rw_trx_hash.
+ */
+ trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
+ (trx_rollback_recovered_callback),
+ &trx_list);
+
+ while (!trx_list.empty())
+ {
+ trx_t *trx= trx_list.back();
+ trx_list.pop_back();
+
+ ut_ad(trx);
+ ut_d(trx_mutex_enter(trx));
+ ut_ad(trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_d(trx_mutex_exit(trx));
+
+ if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown)
+ goto discard;
+
+ if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE)
+ {
+ trx_rollback_active(trx);
+ if (trx->error_state != DB_SUCCESS)
+ {
+ ut_ad(trx->error_state == DB_INTERRUPTED);
+ trx->error_state= DB_SUCCESS;
+ ut_ad(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+discard:
+ trx_sys.deregister_rw(trx);
+ trx_free_at_shutdown(trx);
+ }
+ else
+ trx_free(trx);
+ }
+ }
}
+
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
@@ -878,11 +846,7 @@ Note: this is done in a background thread.
@return a dummy parameter */
extern "C"
os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
+DECLARE_THREAD(trx_rollback_all_recovered)(void*)
{
my_thread_init();
ut_ad(!srv_read_only_mode);
@@ -891,9 +855,15 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
pfs_register_thread(trx_rollback_clean_thread_key);
#endif /* UNIV_PFS_THREAD */
- trx_rollback_or_clean_recovered(TRUE);
+ if (trx_sys.rw_trx_hash.size()) {
+ ib::info() << "Starting in background the rollback of"
+ " recovered transactions";
+ trx_rollback_recovered(true);
+ ib::info() << "Rollback of non-prepared transactions"
+ " completed";
+ }
- trx_rollback_or_clean_is_active = false;
+ trx_rollback_is_active = false;
my_thread_end();
/* We count the number of threads in os_thread_exit(). A created
@@ -910,25 +880,15 @@ static
void
trx_roll_try_truncate(trx_t* trx)
{
- ut_ad(mutex_own(&trx->undo_mutex));
-
trx->pages_undone = 0;
undo_no_t undo_no = trx->undo_no;
- trx_undo_t* insert_undo = trx->rsegs.m_redo.insert_undo;
- trx_undo_t* update_undo = trx->rsegs.m_redo.update_undo;
-
- if (insert_undo || update_undo) {
- mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
- if (insert_undo) {
- ut_ad(insert_undo->rseg == trx->rsegs.m_redo.rseg);
- trx_undo_truncate_end(insert_undo, undo_no, false);
- }
- if (update_undo) {
- ut_ad(update_undo->rseg == trx->rsegs.m_redo.rseg);
- trx_undo_truncate_end(update_undo, undo_no, false);
- }
- mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+
+ if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
+ ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
+ mutex_enter(&undo->rseg->mutex);
+ trx_undo_truncate_end(undo, undo_no, false);
+ mutex_exit(&undo->rseg->mutex);
}
if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
@@ -957,10 +917,8 @@ trx_roll_pop_top_rec(
trx_undo_t* undo, /*!< in: undo log */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mutex_own(&trx->undo_mutex));
-
page_t* undo_page = trx_undo_page_get_s_latched(
- page_id_t(undo->space, undo->top_page_no), mtr);
+ page_id_t(undo->rseg->space->id, undo->top_page_no), mtr);
ulint offset = undo->top_offset;
@@ -969,8 +927,8 @@ trx_roll_pop_top_rec(
true, mtr);
if (prev_rec == NULL) {
-
- undo->empty = TRUE;
+ undo->top_undo_no = IB_ID_MAX;
+ ut_ad(undo->empty());
} else {
page_t* prev_rec_page = page_align(prev_rec);
@@ -980,8 +938,9 @@ trx_roll_pop_top_rec(
}
undo->top_page_no = page_get_page_no(prev_rec_page);
- undo->top_offset = prev_rec - prev_rec_page;
+ undo->top_offset = ulint(prev_rec - prev_rec_page);
undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
+ ut_ad(!undo->empty());
}
return(undo_page + offset);
@@ -996,30 +955,29 @@ trx_roll_pop_top_rec(
trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
{
- mutex_enter(&trx->undo_mutex);
-
if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
trx_roll_try_truncate(trx);
}
- trx_undo_t* undo = NULL;
- trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
- trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+ trx_undo_t* undo = NULL;
+ trx_undo_t* insert = trx->rsegs.m_redo.old_insert;
+ trx_undo_t* update = trx->rsegs.m_redo.undo;
trx_undo_t* temp = trx->rsegs.m_noredo.undo;
const undo_no_t limit = trx->roll_limit;
- ut_ad(!insert || !update || insert->empty || update->empty
+ ut_ad(!insert || !update || insert->empty() || update->empty()
|| insert->top_undo_no != update->top_undo_no);
- ut_ad(!insert || !temp || insert->empty || temp->empty
+ ut_ad(!insert || !temp || insert->empty() || temp->empty()
|| insert->top_undo_no != temp->top_undo_no);
- ut_ad(!update || !temp || update->empty || temp->empty
+ ut_ad(!update || !temp || update->empty() || temp->empty()
|| update->top_undo_no != temp->top_undo_no);
- if (insert && !insert->empty && limit <= insert->top_undo_no) {
+ if (UNIV_LIKELY_NULL(insert)
+ && !insert->empty() && limit <= insert->top_undo_no) {
undo = insert;
}
- if (update && !update->empty && update->top_undo_no >= limit) {
+ if (update && !update->empty() && update->top_undo_no >= limit) {
if (!undo) {
undo = update;
} else if (undo->top_undo_no < update->top_undo_no) {
@@ -1027,7 +985,7 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
}
}
- if (temp && !temp->empty && temp->top_undo_no >= limit) {
+ if (temp && !temp->empty() && temp->top_undo_no >= limit) {
if (!undo) {
undo = temp;
} else if (undo->top_undo_no < temp->top_undo_no) {
@@ -1041,12 +999,11 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
if the transaction object is committed and reused
later, we will default to a full ROLLBACK. */
trx->roll_limit = 0;
- ut_d(trx->in_rollback = false);
- mutex_exit(&trx->undo_mutex);
+ trx->in_rollback = false;
return(NULL);
}
- ut_ad(!undo->empty);
+ ut_ad(!undo->empty());
ut_ad(limit <= undo->top_undo_no);
*roll_ptr = trx_undo_build_roll_ptr(
@@ -1058,11 +1015,19 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec);
switch (trx_undo_rec_get_type(undo_rec)) {
+ case TRX_UNDO_INSERT_METADATA:
+ /* This record type was introduced in MDEV-11369
+ instant ADD COLUMN, which was implemented after
+ MDEV-12288 removed the insert_undo log. There is no
+ instant ADD COLUMN for temporary tables. Therefore,
+ this record can only be present in the main undo log. */
+ ut_ad(undo == update);
+ /* fall through */
case TRX_UNDO_RENAME_TABLE:
- ut_ad(undo == insert);
+ ut_ad(undo == insert || undo == update);
/* fall through */
case TRX_UNDO_INSERT_REC:
- ut_ad(undo == insert || undo == temp);
+ ut_ad(undo == insert || undo == update || undo == temp);
*roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
break;
default:
@@ -1070,12 +1035,7 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
break;
}
- ut_ad(trx_roll_check_undo_rec_ordering(
- undo_no, undo->rseg->space, trx));
-
trx->undo_no = undo_no;
- trx->undo_rseg_space = undo->rseg->space;
- mutex_exit(&trx->undo_mutex);
trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
mtr.commit();
@@ -1133,7 +1093,7 @@ trx_rollback_start(
ut_ad(!trx->in_rollback);
trx->roll_limit = roll_limit;
- ut_d(trx->in_rollback = true);
+ trx->in_rollback = true;
ut_a(trx->roll_limit <= trx->undo_no);
@@ -1150,21 +1110,6 @@ trx_rollback_start(
return(que_fork_start_command(roll_graph));
}
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_commit(trx);
-
- trx->mod_tables.clear();
-
- trx->lock.que_state = TRX_QUE_RUNNING;
-}
-
/*********************************************************************//**
Creates a rollback command node struct.
@return own: rollback node struct */
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index e76695b7e43..ea52df9cb5c 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -33,28 +33,274 @@ Created 3/26/1996 Heikki Tuuri
#include <algorithm>
-/** Creates a rollback segment header.
-This function is called only when a new rollback segment is created in
-the database.
-@param[in] space space id
-@param[in] max_size max size in pages
-@param[in] rseg_slot_no rseg id == slot number in trx sys
+#ifdef WITH_WSREP
+#include <mysql/service_wsrep.h>
+
+#ifdef UNIV_DEBUG
+/** The latest known WSREP XID sequence number */
+static long long wsrep_seqno = -1;
+#endif /* UNIV_DEBUG */
+/** The latest known WSREP XID UUID */
+static unsigned char wsrep_uuid[16];
+
+/** Write the WSREP XID information into rollback segment header.
+@param[in,out] rseg_header rollback segment header
+@param[in] xid WSREP XID
+@param[in,out] mtr mini transaction */
+static void
+trx_rseg_write_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ const XID* xid,
+ mtr_t* mtr)
+{
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header,
+ uint32_t(xid->formatID),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header,
+ uint32_t(xid->gtrid_length),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header,
+ uint32_t(xid->bqual_length),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header,
+ reinterpret_cast<const byte*>(xid->data),
+ XIDDATASIZE, mtr);
+}
+
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out] rseg_header rollback segment header
+@param[in] xid WSREP XID
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ const XID* xid,
+ mtr_t* mtr)
+{
+ ut_ad(wsrep_is_wsrep_xid(xid));
+
+#ifdef UNIV_DEBUG
+ /* Check that seqno is monotonically increasing */
+ long long xid_seqno = wsrep_xid_seqno(xid);
+ const byte* xid_uuid = wsrep_xid_uuid(xid);
+
+ if (xid_seqno != -1
+ && !memcmp(xid_uuid, wsrep_uuid, sizeof wsrep_uuid)) {
+ ut_ad(xid_seqno > wsrep_seqno);
+ } else {
+ memcpy(wsrep_uuid, xid_uuid, sizeof wsrep_uuid);
+ }
+ wsrep_seqno = xid_seqno;
+#endif /* UNIV_DEBUG */
+ trx_rseg_write_wsrep_checkpoint(rseg_header, xid, mtr);
+}
+
+/** Clear the WSREP XID information from rollback segment header.
+@param[in,out] rseg_header Rollback segment header
+@param[in,out] mtr mini-transaction */
+static void
+trx_rseg_clear_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ mtr_t* mtr)
+{
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header,
+ 0, MLOG_4BYTES, mtr);
+}
+
+static void
+trx_rseg_update_wsrep_checkpoint(const XID* xid, mtr_t* mtr)
+{
+ const byte* xid_uuid = wsrep_xid_uuid(xid);
+ /* We must make check against wsrep_uuid here, the
+ trx_rseg_update_wsrep_checkpoint() writes over wsrep_uuid with
+ xid contents in debug mode and the memcmp() will never give nonzero
+ result. */
+ const bool must_clear_rsegs = memcmp(wsrep_uuid, xid_uuid,
+ sizeof wsrep_uuid);
+ const trx_rseg_t* rseg = trx_sys.rseg_array[0];
+
+ trx_rsegf_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+ mtr);
+ if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) {
+ trx_rseg_format_upgrade(rseg_header, mtr);
+ }
+
+ trx_rseg_update_wsrep_checkpoint(rseg_header, xid, mtr);
+
+ if (must_clear_rsegs) {
+ /* Because the UUID part of the WSREP XID differed
+ from current_xid_uuid, the WSREP group UUID was
+ changed, and we must reset the XID in all rollback
+ segment headers. */
+ for (ulint rseg_id = 1; rseg_id < TRX_SYS_N_RSEGS; ++rseg_id) {
+ if (const trx_rseg_t* rseg =
+ trx_sys.rseg_array[rseg_id]) {
+ trx_rseg_clear_wsrep_checkpoint(
+ trx_rsegf_get(rseg->space,
+ rseg->page_no, mtr),
+ mtr);
+ }
+ }
+ }
+}
+
+/** Update WSREP checkpoint XID in first rollback segment header
+as part of wsrep_set_SE_checkpoint() when it is guaranteed that there
+are no wsrep transactions committing.
+If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already
+stored into rollback segments, the WSREP XID in all the remaining rollback
+segments will be reset.
+@param[in] xid WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid)
+{
+ mtr_t mtr;
+ mtr.start();
+ trx_rseg_update_wsrep_checkpoint(xid, &mtr);
+ mtr.commit();
+}
+
+/** Read the WSREP XID information in rollback segment header.
+@param[in] rseg_header Rollback segment header
+@param[out] xid Transaction XID
+@return whether the WSREP XID was present */
+static
+bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid)
+{
+ int formatID = static_cast<int>(
+ mach_read_from_4(
+ TRX_RSEG_WSREP_XID_FORMAT + rseg_header));
+ if (formatID == 0) {
+ return false;
+ }
+
+ xid.formatID = formatID;
+ xid.gtrid_length = static_cast<int>(
+ mach_read_from_4(
+ TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header));
+
+ xid.bqual_length = static_cast<int>(
+ mach_read_from_4(
+ TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header));
+
+ memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE);
+
+ return true;
+}
+
+/** Read the WSREP XID from the TRX_SYS page (in case of upgrade).
+@param[in] page TRX_SYS page
+@param[out] xid WSREP XID (if present)
+@return whether the WSREP XID is present */
+static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid)
+{
+ if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD
+ + page)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ return false;
+ }
+
+ xid.formatID = static_cast<int>(
+ mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_FORMAT + page));
+ xid.gtrid_length = static_cast<int>(
+ mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_GTRID_LEN + page));
+ xid.bqual_length = static_cast<int>(
+ mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_BQUAL_LEN + page));
+ memcpy(xid.data,
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE);
+ return true;
+}
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out] xid WSREP XID
+@return whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid)
+{
+ mtr_t mtr;
+ long long max_xid_seqno = -1;
+ bool found = false;
+
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS;
+ rseg_id++, mtr.commit()) {
+ mtr.start();
+ const buf_block_t* sys = trx_sysf_get(&mtr, false);
+ const uint32_t page_no = trx_sysf_rseg_get_page_no(
+ sys, rseg_id);
+
+ if (page_no == FIL_NULL) {
+ continue;
+ }
+
+ const trx_rsegf_t* rseg_header = trx_rsegf_get_new(
+ trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr);
+
+ if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) {
+ continue;
+ }
+
+ XID tmp_xid;
+ long long tmp_seqno = 0;
+ if (trx_rseg_read_wsrep_checkpoint(rseg_header, tmp_xid)
+ && (tmp_seqno = wsrep_xid_seqno(&tmp_xid))
+ > max_xid_seqno) {
+ found = true;
+ max_xid_seqno = tmp_seqno;
+ xid = tmp_xid;
+ memcpy(wsrep_uuid, wsrep_xid_uuid(&tmp_xid),
+ sizeof wsrep_uuid);
+ }
+ }
+
+ return found;
+}
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out] rseg_header rollback segment header page
+@param[in,out] mtr mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr)
+{
+ ut_ad(page_offset(rseg_header) == TRX_RSEG);
+ byte* rseg_format = TRX_RSEG_FORMAT + rseg_header;
+ mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr);
+ /* Clear also possible garbage at the end of the page. Old
+ InnoDB versions did not initialize unused parts of pages. */
+ byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8;
+ ulint len = srv_page_size
+ - (FIL_PAGE_DATA_END
+ + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8);
+ memset(b, 0, len);
+ mlog_log_string(b, len, mtr);
+}
+
+/** Create a rollback segment header.
+@param[in,out] space system, undo, or temporary tablespace
+@param[in] rseg_id rollback segment identifier
+@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg)
@param[in,out] mtr mini-transaction
@return the created rollback segment
@retval NULL on failure */
buf_block_t*
trx_rseg_header_create(
- ulint space,
- ulint max_size,
- ulint rseg_slot_no,
- mtr_t* mtr)
+ fil_space_t* space,
+ ulint rseg_id,
+ buf_block_t* sys_header,
+ mtr_t* mtr)
{
- trx_sysf_t* sys_header;
buf_block_t* block;
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK));
+ ut_ad(!sys_header == (space == fil_system.temp_space));
/* Allocate a new file segment for the rollback segment */
block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
@@ -66,9 +312,8 @@ trx_rseg_header_create(
buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
- /* Initialize max size field */
- mlog_write_ulint(TRX_RSEG + TRX_RSEG_MAX_SIZE + block->frame,
- max_size, MLOG_4BYTES, mtr);
+ mlog_write_ulint(TRX_RSEG + TRX_RSEG_FORMAT + block->frame, 0,
+ MLOG_4BYTES, mtr);
/* Initialize the history list */
@@ -84,17 +329,20 @@ trx_rseg_header_create(
trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
}
- if (space != SRV_TMP_SPACE_ID) {
+ if (sys_header) {
/* Add the rollback segment info to the free slot in
the trx system header */
- sys_header = trx_sysf_get(mtr);
-
- trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
-
- trx_sysf_rseg_set_page_no(
- sys_header, rseg_slot_no,
- block->page.id.page_no(), mtr);
+ mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS
+ + TRX_SYS_RSEG_SPACE
+ + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+ + sys_header->frame,
+ space->id, MLOG_4BYTES, mtr);
+ mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS
+ + TRX_SYS_RSEG_PAGE_NO
+ + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+ + sys_header->frame,
+ block->page.id.page_no(), MLOG_4BYTES, mtr);
}
return block;
@@ -110,33 +358,20 @@ trx_rseg_mem_free(trx_rseg_t* rseg)
mutex_free(&rseg->mutex);
/* There can't be any active transactions. */
- ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
- ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
-
- for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- undo != NULL;
- undo = next_undo) {
+ ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+ ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0);
- next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-
- UT_LIST_REMOVE(rseg->update_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-
- trx_undo_mem_free(undo);
- }
-
- for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ for (undo = UT_LIST_GET_FIRST(rseg->undo_cached);
undo != NULL;
undo = next_undo) {
next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
+ UT_LIST_REMOVE(rseg->undo_cached, undo);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- trx_undo_mem_free(undo);
+ ut_free(undo);
}
ut_free(rseg);
@@ -148,7 +383,7 @@ trx_rseg_mem_free(trx_rseg_t* rseg)
@param[in] page_no page number of the segment header */
static
trx_rseg_t*
-trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
+trx_rseg_mem_create(ulint id, fil_space_t* space, ulint page_no)
{
trx_rseg_t* rseg = static_cast<trx_rseg_t*>(
ut_zalloc_nokey(sizeof *rseg));
@@ -157,104 +392,258 @@ trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
rseg->space = space;
rseg->page_no = page_no;
rseg->last_page_no = FIL_NULL;
+ rseg->curr_size = 1;
mutex_create(rseg->is_persistent()
? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG,
&rseg->mutex);
- UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
return(rseg);
}
+/** Read the undo log lists.
+@param[in,out] rseg rollback segment
+@param[in,out] max_trx_id maximum observed transaction identifier
+@param[in] rseg_header rollback segment header
+@return the combined size of undo log segments in pages */
+static
+ulint
+trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id,
+ const trx_rsegf_t* rseg_header)
+{
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
+
+ ulint size = 0;
+
+ for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ ulint page_no = trx_rsegf_get_nth_undo(rseg_header, i);
+ if (page_no != FIL_NULL) {
+ size += trx_undo_mem_create_at_db_start(
+ rseg, i, page_no, max_trx_id);
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
+ }
+ }
+
+ return(size);
+}
+
/** Restore the state of a persistent rollback segment.
-@param[in,out] rseg persistent rollback segment
-@param[in,out] mtr mini-transaction */
+@param[in,out] rseg persistent rollback segment
+@param[in,out] max_trx_id maximum observed transaction identifier
+@param[in,out] mtr mini-transaction */
static
void
-trx_rseg_mem_restore(trx_rseg_t* rseg, mtr_t* mtr)
+trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
{
- ulint len;
- fil_addr_t node_addr;
- trx_rsegf_t* rseg_header;
- trx_ulogf_t* undo_log_hdr;
- ulint sum_of_undo_sizes;
+ trx_rsegf_t* rseg_header = trx_rsegf_get_new(
+ rseg->space->id, rseg->page_no, mtr);
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, mtr);
+ if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) {
+ trx_id_t id = mach_read_from_8(rseg_header
+ + TRX_RSEG_MAX_TRX_ID);
- rseg->max_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
+ if (id > max_trx_id) {
+ max_trx_id = id;
+ }
- /* Initialize the undo log lists according to the rseg header */
+ if (rseg_header[TRX_RSEG_BINLOG_NAME]) {
+ const char* binlog_name = reinterpret_cast<const char*>
+ (rseg_header) + TRX_RSEG_BINLOG_NAME;
+ compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof
+ trx_sys.recovered_binlog_filename);
+
+ int cmp = *trx_sys.recovered_binlog_filename
+ ? strncmp(binlog_name,
+ trx_sys.recovered_binlog_filename,
+ TRX_RSEG_BINLOG_NAME_LEN)
+ : 1;
+
+ if (cmp >= 0) {
+ uint64_t binlog_offset = mach_read_from_8(
+ rseg_header + TRX_RSEG_BINLOG_OFFSET);
+ if (cmp) {
+ memcpy(trx_sys.
+ recovered_binlog_filename,
+ binlog_name,
+ TRX_RSEG_BINLOG_NAME_LEN);
+ trx_sys.recovered_binlog_offset
+ = binlog_offset;
+ } else if (binlog_offset >
+ trx_sys.recovered_binlog_offset) {
+ trx_sys.recovered_binlog_offset
+ = binlog_offset;
+ }
+ }
+
+#ifdef WITH_WSREP
+ trx_rseg_read_wsrep_checkpoint(
+ rseg_header, trx_sys.recovered_wsrep_xid);
+#endif
+ }
+ }
- sum_of_undo_sizes = trx_undo_lists_init(rseg);
+ if (srv_operation == SRV_OPERATION_RESTORE) {
+ /* mariabackup --prepare only deals with
+ the redo log and the data files, not with
+ transactions or the data dictionary. */
+ return;
+ }
- rseg->curr_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
- + 1 + sum_of_undo_sizes;
+ /* Initialize the undo log lists according to the rseg header */
- len = flst_get_len(rseg_header + TRX_RSEG_HISTORY);
+ rseg->curr_size = mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE)
+ + 1 + trx_undo_lists_init(rseg, max_trx_id, rseg_header);
- if (len > 0) {
- my_atomic_addlint(&trx_sys->rseg_history_len, len);
+ if (ulint len = flst_get_len(rseg_header + TRX_RSEG_HISTORY)) {
+ trx_sys.history_add(int32(len));
- node_addr = trx_purge_get_log_from_hist(
+ fil_addr_t node_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
rseg->last_page_no = node_addr.page;
rseg->last_offset = node_addr.boffset;
- undo_log_hdr = trx_undo_page_get(
- page_id_t(rseg->space, node_addr.page), mtr)
+ const trx_ulogf_t* undo_log_hdr = trx_undo_page_get(
+ page_id_t(rseg->space->id, node_addr.page), mtr)
+ node_addr.boffset;
- rseg->last_trx_no = mach_read_from_8(
- undo_log_hdr + TRX_UNDO_TRX_NO);
-
- rseg->last_del_marks = mtr_read_ulint(
- undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
-
- TrxUndoRsegs elem(rseg->last_trx_no);
- elem.push_back(rseg);
+ trx_id_t id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_ID);
+ if (id > max_trx_id) {
+ max_trx_id = id;
+ }
+ id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_NO);
+ if (id > max_trx_id) {
+ max_trx_id = id;
+ }
+ unsigned purge = mach_read_from_2(
+ undo_log_hdr + TRX_UNDO_NEEDS_PURGE);
+ ut_ad(purge <= 1);
+ rseg->set_last_trx_no(id, purge != 0);
+ rseg->needs_purge = purge != 0;
if (rseg->last_page_no != FIL_NULL) {
/* There is no need to cover this operation by the purge
mutex because we are still bootstrapping. */
-
- purge_sys->purge_queue.push(elem);
+ purge_sys.purge_queue.push(*rseg);
}
}
}
+/** Read binlog metadata from the TRX_SYS page, in case we are upgrading
+from MySQL or a MariaDB version older than 10.3.5. */
+static void trx_rseg_init_binlog_info(const page_t* page)
+{
+ if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
+ + page)
+ == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+ memcpy(trx_sys.recovered_binlog_filename,
+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
+ + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN);
+ trx_sys.recovered_binlog_offset = mach_read_from_8(
+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
+ + TRX_SYS + page);
+ }
+
+#ifdef WITH_WSREP
+ trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid);
+#endif
+}
+
/** Initialize the rollback segments in memory at database startup. */
void
trx_rseg_array_init()
{
- mtr_t mtr;
+ trx_id_t max_trx_id = 0;
+
+ *trx_sys.recovered_binlog_filename = '\0';
+ trx_sys.recovered_binlog_offset = 0;
+#ifdef WITH_WSREP
+ trx_sys.recovered_wsrep_xid.null();
+ XID wsrep_sys_xid;
+ wsrep_sys_xid.null();
+ bool wsrep_xid_in_rseg_found = false;
+#endif
+
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+ mtr_t mtr;
+ mtr.start();
+ if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) {
+ if (rseg_id == 0) {
+ /* In case this is an upgrade from
+ before MariaDB 10.3.5, fetch the base
+ information from the TRX_SYS page. */
+ max_trx_id = mach_read_from_8(
+ TRX_SYS + TRX_SYS_TRX_ID_STORE
+ + sys->frame);
+ trx_rseg_init_binlog_info(sys->frame);
+#ifdef WITH_WSREP
+ wsrep_sys_xid.set(&trx_sys.recovered_wsrep_xid);
+#endif
+ }
+
+ const uint32_t page_no = trx_sysf_rseg_get_page_no(
+ sys, rseg_id);
+ if (page_no != FIL_NULL) {
+ trx_rseg_t* rseg = trx_rseg_mem_create(
+ rseg_id,
+ fil_space_get(trx_sysf_rseg_get_space(
+ sys, rseg_id)),
+ page_no);
+ ut_ad(rseg->is_persistent());
+ ut_ad(rseg->id == rseg_id);
+ ut_ad(!trx_sys.rseg_array[rseg_id]);
+ trx_sys.rseg_array[rseg_id] = rseg;
+ trx_rseg_mem_restore(rseg, max_trx_id, &mtr);
+#ifdef WITH_WSREP
+ if (!wsrep_sys_xid.is_null() &&
+ !wsrep_sys_xid.eq(&trx_sys.recovered_wsrep_xid)) {
+ wsrep_xid_in_rseg_found = true;
+ ut_ad(memcmp(wsrep_xid_uuid(&wsrep_sys_xid),
+ wsrep_xid_uuid(&trx_sys.recovered_wsrep_xid),
+ sizeof wsrep_uuid)
+ || wsrep_xid_seqno(
+ &wsrep_sys_xid)
+ <= wsrep_xid_seqno(
+ &trx_sys.recovered_wsrep_xid));
+ }
+#endif
+ }
+ }
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ mtr.commit();
+ }
+
+#ifdef WITH_WSREP
+ if (!wsrep_sys_xid.is_null()) {
+ /* Upgrade from a version prior to 10.3.5,
+ where WSREP XID was stored in TRX_SYS page.
+ If no rollback segment has a WSREP XID set,
+ we must copy the XID found in TRX_SYS page
+ to rollback segments. */
+ mtr_t mtr;
mtr.start();
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
- ulint page_no = trx_sysf_rseg_get_page_no(
- sys_header, i, &mtr);
- if (page_no != FIL_NULL) {
- trx_rseg_t* rseg = trx_rseg_mem_create(
- i,
- trx_sysf_rseg_get_space(sys_header, i, &mtr),
- page_no);
- ut_ad(rseg->is_persistent());
- ut_ad(!trx_sys->rseg_array[rseg->id]);
- trx_sys->rseg_array[rseg->id] = rseg;
- trx_rseg_mem_restore(rseg, &mtr);
+ if (!wsrep_xid_in_rseg_found) {
+ trx_rseg_update_wsrep_checkpoint(&wsrep_sys_xid, &mtr);
}
+ /* Finally, clear WSREP XID in TRX_SYS page. */
+ const buf_block_t* sys = trx_sysf_get(&mtr);
+ mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO +
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD + sys->frame,
+ 0, MLOG_4BYTES, &mtr);
+
mtr.commit();
}
+#endif
+
+ trx_sys.init_max_trx_id(max_trx_id + 1);
}
/** Create a persistent rollback segment.
@@ -270,29 +659,25 @@ trx_rseg_create(ulint space_id)
mtr.start();
/* To obey the latching order, acquire the file space
- x-latch before the trx_sys->mutex. */
-#ifdef UNIV_DEBUG
- const fil_space_t* space =
-#endif /* UNIV_DEBUG */
- mtr_x_lock_space(space_id, &mtr);
+ x-latch before the trx_sys.mutex. */
+ fil_space_t* space = mtr_x_lock_space(space_id, &mtr);
ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
- ulint slot_no = trx_sysf_rseg_find_free(&mtr);
- if (buf_block_t* block = slot_no == ULINT_UNDEFINED
- ? NULL
- : trx_rseg_header_create(space_id, ULINT_MAX, slot_no, &mtr)) {
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
-
- ulint id = trx_sysf_rseg_get_space(
- sys_header, slot_no, &mtr);
- ut_a(id == space_id);
-
- rseg = trx_rseg_mem_create(slot_no, space_id,
- block->page.id.page_no());
- ut_ad(rseg->is_persistent());
- ut_ad(!trx_sys->rseg_array[rseg->id]);
- trx_sys->rseg_array[rseg->id] = rseg;
- trx_rseg_mem_restore(rseg, &mtr);
+ if (buf_block_t* sys_header = trx_sysf_get(&mtr)) {
+ ulint rseg_id = trx_sys_rseg_find_free(sys_header);
+ if (buf_block_t* rblock = rseg_id == ULINT_UNDEFINED
+ ? NULL
+ : trx_rseg_header_create(space, rseg_id, sys_header,
+ &mtr)) {
+ ut_ad(trx_sysf_rseg_get_space(sys_header, rseg_id)
+ == space_id);
+ rseg = trx_rseg_mem_create(rseg_id, space,
+ rblock->page.id.page_no());
+ ut_ad(rseg->id == rseg_id);
+ ut_ad(rseg->is_persistent());
+ ut_ad(!trx_sys.rseg_array[rseg->id]);
+ trx_sys.rseg_array[rseg->id] = rseg;
+ }
}
mtr.commit();
@@ -309,20 +694,15 @@ trx_temp_rseg_create()
for (ulong i = 0; i < TRX_SYS_N_RSEGS; i++) {
mtr.start();
mtr.set_log_mode(MTR_LOG_NO_REDO);
-#ifdef UNIV_DEBUG
- const fil_space_t* space =
-#endif /* UNIV_DEBUG */
- mtr_x_lock_space(SRV_TMP_SPACE_ID, &mtr);
- ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
+ mtr_x_lock(&fil_system.temp_space->latch, &mtr);
- buf_block_t* block = trx_rseg_header_create(
- SRV_TMP_SPACE_ID, ULINT_MAX, i, &mtr);
+ buf_block_t* rblock = trx_rseg_header_create(
+ fil_system.temp_space, i, NULL, &mtr);
trx_rseg_t* rseg = trx_rseg_mem_create(
- i, SRV_TMP_SPACE_ID, block->page.id.page_no());
+ i, fil_system.temp_space, rblock->page.id.page_no());
ut_ad(!rseg->is_persistent());
- ut_ad(!trx_sys->temp_rsegs[i]);
- trx_sys->temp_rsegs[i] = rseg;
- trx_rseg_mem_restore(rseg, &mtr);
+ ut_ad(!trx_sys.temp_rsegs[i]);
+ trx_sys.temp_rsegs[i] = rseg;
mtr.commit();
}
}
@@ -339,54 +719,70 @@ trx_rseg_get_n_undo_tablespaces(
ulint* space_ids) /*!< out: array of space ids of
UNDO tablespaces */
{
- ulint i;
- mtr_t mtr;
- trx_sysf_t* sys_header;
- ulint n_undo_tablespaces = 0;
-
- mtr_start(&mtr);
+ mtr_t mtr;
+ mtr.start();
- sys_header = trx_sysf_get(&mtr);
+ buf_block_t* sys_header = trx_sysf_get(&mtr, false);
+ if (!sys_header) {
+ mtr.commit();
+ return 0;
+ }
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
- ulint space;
+ ulint* end = space_ids;
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr);
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+ uint32_t page_no = trx_sysf_rseg_get_page_no(sys_header,
+ rseg_id);
if (page_no == FIL_NULL) {
continue;
}
- space = trx_sysf_rseg_get_space(sys_header, i, &mtr);
-
- if (space != 0) {
- ulint j;
- ibool found = FALSE;
-
- for (j = 0; j < n_undo_tablespaces; ++j) {
- if (space_ids[j] == space) {
- found = TRUE;
- break;
- }
- }
-
- if (!found) {
- ut_a(n_undo_tablespaces <= i);
- space_ids[n_undo_tablespaces++] = space;
+ if (ulint space = trx_sysf_rseg_get_space(sys_header,
+ rseg_id)) {
+ if (std::find(space_ids, end, space) == end) {
+ *end++ = space;
}
}
}
- mtr_commit(&mtr);
+ mtr.commit();
- ut_a(n_undo_tablespaces <= TRX_SYS_N_RSEGS);
+ ut_a(end - space_ids <= TRX_SYS_N_RSEGS);
+ *end = ULINT_UNDEFINED;
- space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
+ std::sort(space_ids, end);
- if (n_undo_tablespaces > 0) {
- std::sort(space_ids, space_ids + n_undo_tablespaces);
+ return ulint(end - space_ids);
+}
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out] rseg_header rollback segment header
+@param[in] trx committing transaction
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr)
+{
+ DBUG_LOG("trx", "trx_mysql_binlog_offset: " << trx->mysql_log_offset);
+
+ const size_t len = strlen(trx->mysql_log_file_name) + 1;
+
+ ut_ad(len > 1);
+
+ if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) {
+ return;
}
- return(n_undo_tablespaces);
+ mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET,
+ trx->mysql_log_offset, mtr);
+ byte* p = rseg_header + TRX_RSEG_BINLOG_NAME;
+ const byte* binlog_name = reinterpret_cast<const byte*>
+ (trx->mysql_log_file_name);
+
+ if (memcmp(binlog_name, p, len)) {
+ mlog_write_string(p, binlog_name, len, mtr);
+ }
}
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 6529de31fb5..3a09d6929fa 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -24,8 +24,8 @@ Transaction system
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#include "mysqld.h"
#include "trx0sys.h"
+#include "mysqld.h"
#include "sql_error.h"
#include "fsp0fsp.h"
@@ -40,55 +40,9 @@ Created 3/26/1996 Heikki Tuuri
#include "log0log.h"
#include "log0recv.h"
#include "os0file.h"
-#include "read0read.h"
-
-#include <mysql/service_wsrep.h>
-
-/** The file format tag structure with id and name. */
-struct file_format_t {
- ulint id; /*!< id of the file format */
- const char* name; /*!< text representation of the
- file format */
- ib_mutex_t mutex; /*!< covers changes to the above
- fields */
-};
/** The transaction system */
-trx_sys_t* trx_sys;
-
-/** List of animal names representing file format. */
-static const char* file_format_name_map[] = {
- "Antelope",
- "Barracuda",
- "Cheetah",
- "Dragon",
- "Elk",
- "Fox",
- "Gazelle",
- "Hornet",
- "Impala",
- "Jaguar",
- "Kangaroo",
- "Leopard",
- "Moose",
- "Nautilus",
- "Ocelot",
- "Porpoise",
- "Quail",
- "Rabbit",
- "Shark",
- "Tiger",
- "Urchin",
- "Viper",
- "Whale",
- "Xenops",
- "Yak",
- "Zebra"
-};
-
-/** The number of elements in the file format name array. */
-static const ulint FILE_FORMAT_NAME_N
- = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
+trx_sys_t trx_sys;
/** Check whether transaction id is valid.
@param[in] id transaction id to check
@@ -98,7 +52,7 @@ ReadView::check_trx_id_sanity(
trx_id_t id,
const table_name_t& name)
{
- if (id >= trx_sys->max_trx_id) {
+ if (id >= trx_sys.get_max_trx_id()) {
ib::warn() << "A transaction id"
<< " in a record of table "
@@ -129,249 +83,32 @@ ReadView::check_trx_id_sanity(
uint trx_rseg_n_slots_debug = 0;
#endif
-/** This is used to track the maximum file format id known to InnoDB. It's
-updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
-or create a table. */
-static file_format_t file_format_max;
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
- mtr_t mtr;
- trx_sysf_t* sys_header;
-
- /* wsrep_fake_trx_id violates this assert
- Copied from trx_sys_get_new_trx_id
- */
- ut_ad(trx_sys_mutex_own());
-
- if (!srv_read_only_mode) {
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- mlog_write_ull(
- sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
-
- mtr_commit(&mtr);
- }
-}
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- int64_t offset, /*!< in: position in that log file */
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- mtr_t* mtr) /*!< in: mtr */
-{
- DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset));
-
- const size_t len = strlen(file_name) + 1;
-
- if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
-
- return;
- }
-
- if (mach_read_from_4(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
- + TRX_SYS_MYSQL_LOG_INFO + sys_header)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mlog_write_ulint(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
- + TRX_SYS_MYSQL_LOG_INFO + sys_header,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- if (memcmp(file_name, TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO
- + sys_header, len)) {
- mlog_write_string(TRX_SYS_MYSQL_LOG_NAME
- + TRX_SYS_MYSQL_LOG_INFO
- + sys_header,
- reinterpret_cast<const byte*>(file_name),
- len, mtr);
- }
-
- mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
- + sys_header, offset, mtr);
-}
-
/** Display the MySQL binlog offset info if it is present in the trx
system header. */
void
trx_sys_print_mysql_binlog_offset()
{
- mtr_t mtr;
-
- mtr.start();
-
- const trx_sysf_t* sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD + sys_header)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
- ib::info() << "Last binlog file '"
- << TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
- + sys_header
- << "', position "
- << mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET
- + sys_header);
- }
-
- mtr.commit();
-}
-
-#ifdef WITH_WSREP
-
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-/** Read WSREP XID seqno */
-static inline long long read_wsrep_xid_seqno(const XID* xid)
-{
- long long seqno;
- memcpy(&seqno, xid->data + 24, sizeof(long long));
- return seqno;
-}
-
-/** Read WSREP XID UUID */
-static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
- memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
-/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
-@param[in] xid Transaction XID
-@param[in,out] sys_header sys_header
-@param[in] mtr minitransaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid,
- trx_sysf_t* sys_header,
- mtr_t* mtr)
-{
- ut_ad(xid->formatID == 1);
- ut_ad(wsrep_is_wsrep_xid(xid));
-
- if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
- TRX_SYS_WSREP_XID_MAGIC_N,
- MLOG_4BYTES, mtr);
-#ifdef UNIV_DEBUG
- } else {
- /* Check that seqno is monotonically increasing */
- unsigned char xid_uuid[16];
- long long xid_seqno = read_wsrep_xid_seqno(xid);
- read_wsrep_xid_uuid(xid, xid_uuid);
-
- if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) {
- ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
- trx_sys_cur_xid_seqno = xid_seqno;
- } else {
- memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
- }
-
- trx_sys_cur_xid_seqno = xid_seqno;
-#endif /* UNIV_DEBUG */
- }
-
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_FORMAT,
- (int)xid->formatID,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_GTRID_LEN,
- (int)xid->gtrid_length,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_BQUAL_LEN,
- (int)xid->bqual_length,
- MLOG_4BYTES, mtr);
- mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_DATA,
- (const unsigned char*) xid->data,
- XIDDATASIZE, mtr);
-}
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out] xid WSREP XID
-@return whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid)
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint magic;
-
- ut_ad(xid);
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- mtr.commit();
- xid->null();
- xid->gtrid_length = 0;
- xid->bqual_length = 0;
- memset(xid->data, 0, sizeof xid->data);
- memset(xid->data + 24, 0xff, 8);
- return false;
+ if (!*trx_sys.recovered_binlog_filename) {
+ return;
}
- xid->formatID = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
- xid->gtrid_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
- xid->bqual_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
- ut_memcpy(xid->data,
- sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
- XIDDATASIZE);
-
- mtr_commit(&mtr);
- return true;
+ ib::info() << "Last binlog file '"
+ << trx_sys.recovered_binlog_filename
+ << "', position "
+ << trx_sys.recovered_binlog_offset;
}
-#endif /* WITH_WSREP */
-
-/** @return an unallocated rollback segment slot in the TRX_SYS header
+/** Find an available rollback segment.
+@param[in] sys_header
+@return an unallocated rollback segment slot in the TRX_SYS header
@retval ULINT_UNDEFINED if not found */
ulint
-trx_sysf_rseg_find_free(mtr_t* mtr)
+trx_sys_rseg_find_free(const buf_block_t* sys_header)
{
- trx_sysf_t* sys_header = trx_sysf_get(mtr);
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
- if (trx_sysf_rseg_get_page_no(sys_header, i, mtr)
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+ if (trx_sysf_rseg_get_page_no(sys_header, rseg_id)
== FIL_NULL) {
- return(i);
+ return rseg_id;
}
}
@@ -386,13 +123,14 @@ trx_sysf_get_n_rseg_slots()
mtr_t mtr;
mtr.start();
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
srv_available_undo_logs = 0;
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
- srv_available_undo_logs
- += trx_sysf_rseg_get_page_no(sys_header, i, &mtr)
- != FIL_NULL;
+ if (const buf_block_t* sys_header = trx_sysf_get(&mtr, false)) {
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+ srv_available_undo_logs
+ += trx_sysf_rseg_get_page_no(sys_header,
+ rseg_id)
+ != FIL_NULL;
+ }
}
mtr.commit();
@@ -407,7 +145,6 @@ trx_sysf_create(
/*============*/
mtr_t* mtr) /*!< in: mtr */
{
- trx_sysf_t* sys_header;
ulint slot_no;
buf_block_t* block;
page_t* page;
@@ -419,10 +156,12 @@ trx_sysf_create(
then enter the kernel: we must do it in this order to conform
to the latching order rules. */
- mtr_x_lock_space(TRX_SYS_SPACE, mtr);
+ mtr_x_lock(&fil_system.sys_space->latch, mtr);
+ compile_time_assert(TRX_SYS_SPACE == 0);
/* Create the trx sys file block in a new allocated file segment */
- block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+ block = fseg_create(fil_system.sys_space, 0,
+ TRX_SYS + TRX_SYS_FSEG_HEADER,
mtr);
buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
@@ -440,126 +179,42 @@ trx_sysf_create(
mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
+ TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
-
/* Reset the rollback segment slots. Old versions of InnoDB
(before MySQL 5.5) define TRX_SYS_N_RSEGS as 256 and expect
that the whole array is initialized. */
- ptr = TRX_SYS_RSEGS + sys_header;
+ ptr = TRX_SYS + TRX_SYS_RSEGS + page;
compile_time_assert(256 >= TRX_SYS_N_RSEGS);
memset(ptr, 0xff, 256 * TRX_SYS_RSEG_SLOT_SIZE);
ptr += 256 * TRX_SYS_RSEG_SLOT_SIZE;
- ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
+ ut_a(ptr <= page + (srv_page_size - FIL_PAGE_DATA_END));
/* Initialize all of the page. This part used to be uninitialized. */
- memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
+ memset(ptr, 0, srv_page_size - FIL_PAGE_DATA_END + size_t(page - ptr));
- mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- + page - sys_header, mtr);
+ mlog_log_string(TRX_SYS + page, srv_page_size - FIL_PAGE_DATA_END
+ - TRX_SYS, mtr);
/* Create the first rollback segment in the SYSTEM tablespace */
- slot_no = trx_sysf_rseg_find_free(mtr);
- buf_block_t* rblock = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX,
- slot_no, mtr);
+ slot_no = trx_sys_rseg_find_free(block);
+ buf_block_t* rblock = trx_rseg_header_create(fil_system.sys_space,
+ slot_no, block, mtr);
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
ut_a(rblock->page.id.page_no() == FSP_FIRST_RSEG_PAGE_NO);
}
-/** Initialize the transaction system main-memory data structures. */
-void
-trx_sys_init_at_db_start()
-{
- trx_sysf_t* sys_header;
- ib_uint64_t rows_to_undo = 0;
- const char* unit = "";
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
- trx_sys_get_new_trx_id will evaluate to TRUE when the function
- is first time called, and the value for trx id will be written
- to the disk-based header! Thus trx id values will not overlap when
- the database is repeatedly started! */
-
- mtr_t mtr;
- mtr.start();
-
- sys_header = trx_sysf_get(&mtr);
-
- trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
- + ut_uint64_align_up(mach_read_from_8(sys_header
- + TRX_SYS_TRX_ID_STORE),
- TRX_SYS_TRX_ID_WRITE_MARGIN);
-
- mtr.commit();
- ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
-
- trx_lists_init_at_db_start();
-
- /* This mutex is not strictly required, it is here only to satisfy
- the debug code (assertions). We are still running in single threaded
- bootstrap mode. */
-
- trx_sys_mutex_enter();
-
- if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
- const trx_t* trx;
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- ut_ad(trx->is_recovered);
- assert_trx_in_rw_list(trx);
-
- if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
- rows_to_undo += trx->undo_no;
- }
- }
-
- if (rows_to_undo > 1000000000) {
- unit = "M";
- rows_to_undo = rows_to_undo / 1000000;
- }
-
- ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
- << " transaction(s) which must be rolled back or"
- " cleaned up in total " << rows_to_undo << unit
- << " row operations to undo";
-
- ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
- }
-
- trx_sys_mutex_exit();
-
- trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
-}
-
-/*****************************************************************//**
-Creates the trx_sys instance and initializes purge_queue and mutex. */
+/** Create the instance */
void
-trx_sys_create(void)
-/*================*/
+trx_sys_t::create()
{
- ut_ad(trx_sys == NULL);
-
- trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
-
- mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
-
- UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
- UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
- UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
-
- trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
-
- new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
- mem_key_trx_sys_t_rw_trx_ids));
-
- new(&trx_sys->rw_trx_set) TrxIdSet();
+ ut_ad(this == &trx_sys);
+ ut_ad(!is_initialised());
+ m_initialised = true;
+ mutex_create(LATCH_ID_TRX_SYS, &mutex);
+ UT_LIST_INIT(trx_list, &trx_t::trx_list);
+ my_atomic_store32(&rseg_history_len, 0);
+
+ rw_trx_hash.init();
}
/*****************************************************************//**
@@ -577,260 +232,6 @@ trx_sys_create_sys_pages(void)
mtr_commit(&mtr);
}
-/*****************************************************************//**
-Update the file format tag.
-@return always TRUE */
-static
-ibool
-trx_sys_file_format_max_write(
-/*==========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name, can
- be NULL */
-{
- mtr_t mtr;
- byte* ptr;
- buf_block_t* block;
- ib_uint64_t tag_value;
-
- mtr_start(&mtr);
-
- block = buf_page_get(
- page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
- RW_X_LATCH, &mtr);
-
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (name) {
- *name = file_format_max.name;
- }
-
- mlog_write_ull(ptr, tag_value, &mtr);
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
-static
-ulint
-trx_sys_file_format_max_read(void)
-/*==============================*/
-{
- mtr_t mtr;
- const byte* ptr;
- const buf_block_t* block;
- ib_id_t file_format_id;
-
- /* Since this is called during the startup phase it's safe to
- read the value without a covering mutex. */
- mtr_start(&mtr);
-
- block = buf_page_get(
- page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
- RW_X_LATCH, &mtr);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
-
- mtr_commit(&mtr);
-
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(ULINT_UNDEFINED);
- }
-
- return((ulint) file_format_id);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- ut_a(id < FILE_FORMAT_NAME_N);
-
- return(file_format_name_map[id]);
-}
-
-/*****************************************************************//**
-Check for the max file format tag stored on disk. Note: If max_format_id
-is == UNIV_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id) /*!< in: max format id to check */
-{
- ulint format_id;
-
- /* Check the file format in the tablespace. Do not try to
- recover if the file format is not supported by the engine
- unless forced by the user. */
- format_id = trx_sys_file_format_max_read();
- if (format_id == ULINT_UNDEFINED) {
- /* Format ID was not set. Set it to minimum possible
- value. */
- format_id = UNIV_FORMAT_MIN;
- }
-
- ib::info() << "Highest supported file format is "
- << trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
-
- if (format_id > UNIV_FORMAT_MAX) {
-
- ut_a(format_id < FILE_FORMAT_NAME_N);
-
- const std::string msg = std::string("The system"
- " tablespace is in a file format that this version"
- " doesn't support - ")
- + trx_sys_file_format_id_to_name(format_id)
- + ".";
-
- if (max_format_id <= UNIV_FORMAT_MAX) {
- ib::error() << msg;
- } else {
- ib::warn() << msg;
- }
-
- if (max_format_id <= UNIV_FORMAT_MAX) {
- return(DB_ERROR);
- }
- }
-
- format_id = (format_id > max_format_id) ? format_id : max_format_id;
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name or
- NULL if not needed. */
-{
- ibool ret = FALSE;
-
- ut_a(format_id <= UNIV_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- /* Only update if not already same value. */
- if (format_id != file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-void
-trx_sys_file_format_tag_init(void)
-/*==============================*/
-{
- ulint format_id;
-
- format_id = trx_sys_file_format_max_read();
-
- /* If format_id is not set then set it to the minimum. */
- if (format_id == ULINT_UNDEFINED) {
- trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
- }
-}
-
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id) /*!< in: file format identifier */
-{
- ibool ret = FALSE;
-
- ut_a(name);
- ut_a(file_format_max.name != NULL);
- ut_a(format_id <= UNIV_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- if (format_id > file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-const char*
-trx_sys_file_format_max_get(void)
-/*=============================*/
-{
- return(file_format_max.name);
-}
-
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-void
-trx_sys_file_format_init(void)
-/*==========================*/
-{
- mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = UNIV_FORMAT_MIN;
-
- file_format_max.name = trx_sys_file_format_id_to_name(
- file_format_max.id);
-}
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-void
-trx_sys_file_format_close(void)
-/*===========================*/
-{
- mutex_free(&file_format_max.mutex);
-}
-
/** Create the rollback segments.
@return whether the creation succeeded */
bool
@@ -909,128 +310,53 @@ trx_sys_create_rsegs()
return(true);
}
-/*********************************************************************
-Shutdown/Close the transaction system. */
+/** Close the transaction system on shutdown */
void
-trx_sys_close(void)
-/*===============*/
+trx_sys_t::close()
{
- ut_ad(trx_sys != NULL);
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
+ if (!is_initialised()) {
+ return;
+ }
- if (ulint size = trx_sys->mvcc->size()) {
+ if (size_t size = view_count()) {
ib::error() << "All read views were not closed before"
" shutdown: " << size << " read views open";
}
- /* Only prepared transactions may be left in the system. Free them. */
- ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx
- || !srv_was_started
- || srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
-
- while (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
- UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
- trx_free_prepared(trx);
- }
+ rw_trx_hash.destroy();
/* There can't be any active transactions. */
for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+ if (trx_rseg_t* rseg = rseg_array[i]) {
trx_rseg_mem_free(rseg);
}
- if (trx_rseg_t* rseg = trx_sys->temp_rsegs[i]) {
+ if (trx_rseg_t* rseg = temp_rsegs[i]) {
trx_rseg_mem_free(rseg);
}
}
- UT_DELETE(trx_sys->mvcc);
-
- ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
-
- /* We used placement new to create this mutex. Call the destructor. */
- mutex_free(&trx_sys->mutex);
-
- trx_sys->rw_trx_ids.~trx_ids_t();
-
- trx_sys->rw_trx_set.~TrxIdSet();
-
- ut_free(trx_sys);
-
- trx_sys = NULL;
-}
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-This is only used to check if it's safe to shutdown.
-@return total number of active transactions or 0 if none */
-ulint
-trx_sys_any_active_transactions(void)
-/*=================================*/
-{
- ulint total_trx = 0;
-
- trx_sys_mutex_enter();
-
- total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
- total_trx += trx->state != TRX_STATE_NOT_STARTED;
- }
-
- ut_a(total_trx >= trx_sys->n_prepared_trx);
- total_trx -= trx_sys->n_prepared_trx;
-
- trx_sys_mutex_exit();
-
- return(total_trx);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_ut_list_t.
-@return true if valid. */
-static
-bool
-trx_sys_validate_trx_list_low(
-/*===========================*/
- trx_ut_list_t* trx_list) /*!< in: &trx_sys->rw_trx_list */
-{
- const trx_t* trx;
- const trx_t* prev_trx = NULL;
-
- ut_ad(trx_sys_mutex_own());
-
- ut_ad(trx_list == &trx_sys->rw_trx_list);
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
-
- check_trx_state(trx);
- ut_a(prev_trx == NULL || prev_trx->id > trx->id);
- }
-
- return(true);
+ ut_a(UT_LIST_GET_LEN(trx_list) == 0);
+ mutex_free(&mutex);
+ m_initialised = false;
}
-/*************************************************************//**
-Validate the trx_sys_t::rw_trx_list.
-@return true if the list is valid. */
-bool
-trx_sys_validate_trx_list()
-/*=======================*/
+/** @return total number of active (non-prepared) transactions */
+ulint trx_sys_t::any_active_transactions()
{
- ut_ad(trx_sys_mutex_own());
-
- ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
-
- return(true);
+ uint32_t total_trx= 0;
+
+ mutex_enter(&mutex);
+ for (trx_t* trx= UT_LIST_GET_FIRST(trx_sys.trx_list);
+ trx != NULL;
+ trx= UT_LIST_GET_NEXT(trx_list, trx))
+ {
+ if (trx->state == TRX_STATE_COMMITTED_IN_MEMORY ||
+ (trx->state == TRX_STATE_ACTIVE && trx->id))
+ total_trx++;
+ }
+ mutex_exit(&mutex);
+ return total_trx;
}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 1f6ed7a48d4..2eb3161f3b7 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri
#include "log0log.h"
#include "os0proc.h"
#include "que0que.h"
-#include "read0read.h"
#include "srv0mon.h"
#include "srv0srv.h"
#include "srv0start.h"
@@ -53,8 +52,16 @@ Created 3/26/1996 Heikki Tuuri
#include <set>
#include <new>
-extern "C"
-int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+/** The bit pattern corresponding to TRX_ID_MAX */
+const byte trx_id_max_bytes[8] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/** The bit pattern corresponding to max timestamp */
+const byte timestamp_max_bytes[7] = {
+ 0x7f, 0xff, 0xff, 0xff, 0x0f, 0x42, 0x3f
+};
+
static const ulint MAX_DETAILED_ERROR_LEN = 256;
@@ -148,7 +155,7 @@ trx_init(
trx->last_sql_stat_start.least_undo_no = 0;
- ut_ad(!MVCC::is_view_active(trx->read_view));
+ ut_ad(!trx->read_view.is_open());
trx->lock.rec_cached = 0;
@@ -175,6 +182,9 @@ struct TrxFactory {
new(&trx->lock.table_locks) lock_list();
+ new(&trx->read_view) ReadView();
+
+ trx->rw_trx_hash_pins = 0;
trx_init(trx);
trx->dict_operation_lock_mode = 0;
@@ -194,7 +204,6 @@ struct TrxFactory {
&trx_named_savept_t::trx_savepoints);
mutex_create(LATCH_ID_TRX, &trx->mutex);
- mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
}
/** Release resources held by the transaction object.
@@ -202,8 +211,7 @@ struct TrxFactory {
static void destroy(trx_t* trx)
{
ut_a(trx->magic_n == TRX_MAGIC_N);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
+ ut_ad(!trx->mysql_thd);
ut_a(trx->lock.wait_lock == NULL);
ut_a(trx->lock.wait_thr == NULL);
@@ -220,13 +228,14 @@ struct TrxFactory {
ut_free(trx->detailed_error);
mutex_free(&trx->mutex);
- mutex_free(&trx->undo_mutex);
trx->mod_tables.~trx_mod_tables_t();
- ut_ad(trx->read_view == NULL);
+ ut_ad(!trx->read_view.is_open());
trx->lock.table_locks.~lock_list();
+
+ trx->read_view.~ReadView();
}
/** Enforce any invariants here, this is called before the transaction
@@ -246,9 +255,6 @@ struct TrxFactory {
ut_ad(trx->mysql_thd == 0);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
-
ut_a(trx->lock.wait_thr == NULL);
ut_a(trx->lock.wait_lock == NULL);
ut_a(trx->dict_operation_lock_mode == 0);
@@ -338,9 +344,7 @@ trx_pool_close()
}
/** @return a trx_t instance from trx_pools. */
-static
-trx_t*
-trx_create_low()
+trx_t *trx_create()
{
trx_t* trx = trx_pools->get();
@@ -352,6 +356,7 @@ trx_create_low()
/* We just got trx from pool, it should be non locking */
ut_ad(trx->will_lock == 0);
ut_ad(trx->state == TRX_STATE_NOT_STARTED);
+ ut_ad(!trx->rw_trx_hash_pins);
DBUG_LOG("trx", "Create: " << trx);
@@ -375,90 +380,16 @@ trx_create_low()
trx->wsrep_event = NULL;
#endif /* WITH_WSREP */
- return(trx);
-}
-
-/**
-Release a trx_t instance back to the pool.
-@param trx the instance to release. */
-static
-void
-trx_free(trx_t*& trx)
-{
- assert_trx_is_free(trx);
-
- trx->mysql_thd = 0;
- trx->mysql_log_file_name = 0;
-
- // FIXME: We need to avoid this heap free/alloc for each commit.
- if (trx->autoinc_locks != NULL) {
- ut_ad(ib_vector_is_empty(trx->autoinc_locks));
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->autoinc_locks);
- trx->autoinc_locks = NULL;
- }
-
- trx->mod_tables.clear();
-
- ut_ad(trx->read_view == NULL);
-
- /* trx locking state should have been reset before returning trx
- to pool */
- ut_ad(trx->will_lock == 0);
-
- trx_pools->mem_free(trx);
- /* Unpoison the memory for innodb_monitor_set_option;
- it is operating also on the freed transaction objects. */
- MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
- MEM_UNDEFINED(&trx->undo_mutex, sizeof trx->undo_mutex);
- /* Declare the contents as initialized for Valgrind;
- we checked that it was initialized in trx_pools->mem_free(trx). */
- UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
- UNIV_MEM_VALID(&trx->undo_mutex, sizeof trx->undo_mutex);
-
- trx = NULL;
-}
-
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
-{
- trx_t* trx;
-
- trx = trx_create_low();
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
-{
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- trx_sys_mutex_enter();
-
- ut_d(trx->in_mysql_trx_list = TRUE);
- UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
-
- trx_sys_mutex_exit();
+ trx_sys.register_trx(trx);
return(trx);
}
-/** Check state of transaction before freeing it.
-@param trx trx object to validate */
-static
-void
-trx_validate_state_before_free(trx_t* trx)
+/**
+ Release a trx_t instance back to the pool.
+ @param trx the instance to release.
+*/
+void trx_free(trx_t*& trx)
{
ut_ad(!trx->declared_to_be_inside_innodb);
ut_ad(!trx->n_mysql_tables_in_use);
@@ -495,58 +426,62 @@ trx_validate_state_before_free(trx_t* trx)
trx->dict_operation = TRX_DICT_OP_NONE;
assert_trx_is_inactive(trx);
-}
-/** Free and initialize a transaction object instantinated during recovery.
-@param trx trx object to free and initialize during recovery */
-void
-trx_free_resurrected(trx_t* trx)
-{
- trx_validate_state_before_free(trx);
+ trx_sys.deregister_trx(trx);
- trx_init(trx);
+ assert_trx_is_free(trx);
- trx_free(trx);
-}
+ trx_sys.rw_trx_hash.put_pins(trx);
+ trx->mysql_thd = 0;
+ trx->mysql_log_file_name = 0;
-/** Free a transaction that was allocated by background or user threads.
-@param trx trx object to free */
-void
-trx_free_for_background(trx_t* trx)
-{
- trx_validate_state_before_free(trx);
+ // FIXME: We need to avoid this heap free/alloc for each commit.
+ if (trx->autoinc_locks != NULL) {
+ ut_ad(ib_vector_is_empty(trx->autoinc_locks));
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->autoinc_locks);
+ trx->autoinc_locks = NULL;
+ }
- trx_free(trx);
+ trx->mod_tables.clear();
+
+ /* trx locking state should have been reset before returning trx
+ to pool */
+ ut_ad(trx->will_lock == 0);
+
+ trx_pools->mem_free(trx);
+ /* Unpoison the memory for innodb_monitor_set_option;
+ it is operating also on the freed transaction objects. */
+ MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
+ /* Declare the contents as initialized for Valgrind;
+ we checked that it was initialized in trx_pools->mem_free(trx). */
+ UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
+
+ trx = NULL;
}
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
+/** At shutdown, frees a transaction object. */
void
-trx_free_prepared(
-/*==============*/
- trx_t* trx) /*!< in, own: trx object */
+trx_free_at_shutdown(trx_t *trx)
{
+ ut_ad(trx->is_recovered);
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
|| trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)
- || (trx->is_recovered
- && (trx_state_eq(trx, TRX_STATE_ACTIVE)
- || trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY))
+ || (trx_state_eq(trx, TRX_STATE_ACTIVE)
&& (!srv_was_started
|| srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT
|| srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || (!srv_is_being_started
+ && !srv_undo_sources && srv_fast_shutdown))));
ut_a(trx->magic_n == TRX_MAGIC_N);
lock_trx_release_locks(trx);
- trx_undo_free_prepared(trx);
-
- assert_trx_in_rw_list(trx);
+ trx_undo_free_at_shutdown(trx);
ut_a(!trx->read_only);
- ut_d(trx->in_rw_trx_list = FALSE);
-
DBUG_LOG("trx", "Free prepared: " << trx);
trx->state = TRX_STATE_NOT_STARTED;
@@ -562,71 +497,20 @@ trx_free_prepared(
trx_free(trx);
}
-/** Disconnect a transaction from MySQL and optionally mark it as if
-it's been recovered. For the marking the transaction must be in prepared state.
-The recovery-marked transaction is going to survive "alone" so its association
-with the mysql handle is destroyed now rather than when it will be
-finally freed.
-@param[in,out] trx transaction
-@param[in] prepared boolean value to specify whether trx is
- for recovery or not. */
-inline
-void
-trx_disconnect_from_mysql(
- trx_t* trx,
- bool prepared)
-{
- trx_sys_mutex_enter();
-
- ut_ad(trx->in_mysql_trx_list);
- ut_d(trx->in_mysql_trx_list = FALSE);
-
- UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
-
- if (trx->read_view != NULL) {
- trx_sys->mvcc->view_close(trx->read_view, true);
- }
-
- ut_ad(trx_sys_validate_trx_list());
-
- if (prepared) {
- ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
-
- trx->is_recovered = true;
- trx_sys->n_prepared_recovered_trx++;
- trx->mysql_thd = NULL;
- /* todo/fixme: suggest to do it at innodb prepare */
- trx->will_lock = 0;
- }
-
- trx_sys_mutex_exit();
-}
-
-/** Disconnect a transaction from MySQL.
-@param[in,out] trx transaction */
-inline
-void
-trx_disconnect_plain(trx_t* trx)
-{
- trx_disconnect_from_mysql(trx, false);
-}
-
-/** Disconnect a prepared transaction from MySQL.
-@param[in,out] trx transaction */
-void
-trx_disconnect_prepared(trx_t* trx)
-{
- trx_disconnect_from_mysql(trx, true);
-}
-
-/** Free a transaction object for MySQL.
-@param[in,out] trx transaction */
-void
-trx_free_for_mysql(trx_t* trx)
+/**
+ Disconnect a prepared transaction from MySQL
+ @param[in,out] trx transaction
+*/
+void trx_disconnect_prepared(trx_t *trx)
{
- trx_disconnect_plain(trx);
- trx_free_for_background(trx);
+ ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
+ ut_ad(trx->mysql_thd);
+ trx->read_view.close();
+ trx->is_recovered= true;
+ trx->mysql_thd= NULL;
+ /* todo/fixme: suggest to do it at innodb prepare */
+ trx->will_lock= 0;
}
/****************************************************************//**
@@ -636,8 +520,6 @@ void
trx_resurrect_table_locks(
/*======================*/
trx_t* trx, /*!< in/out: transaction */
- const trx_undo_ptr_t* undo_ptr,
- /*!< in: pointer to undo segment. */
const trx_undo_t* undo) /*!< in: undo log */
{
mtr_t mtr;
@@ -645,10 +527,11 @@ trx_resurrect_table_locks(
trx_undo_rec_t* undo_rec;
table_id_set tables;
- ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
-
- if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+ trx_state_eq(trx, TRX_STATE_PREPARED));
+ ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
+ if (undo->empty()) {
return;
}
@@ -657,7 +540,8 @@ trx_resurrect_table_locks(
/* trx_rseg_mem_create() may have acquired an X-latch on this
page, so we cannot acquire an S-latch. */
undo_page = trx_undo_page_get(
- page_id_t(undo->space, undo->top_page_no), &mtr);
+ page_id_t(trx->rsegs.m_redo.rseg->space->id,
+ undo->top_page_no), &mtr);
undo_rec = undo_page + undo->top_offset;
@@ -700,192 +584,97 @@ trx_resurrect_table_locks(
}
if (trx->state == TRX_STATE_PREPARED) {
- trx->mod_tables.insert(table);
+ trx->mod_tables.insert(
+ trx_mod_tables_t::value_type(table,
+ 0));
}
lock_table_ix_resurrect(table, trx);
- DBUG_PRINT("ib_trx",
- ("resurrect" TRX_ID_FMT
- " table '%s' IX lock from %s undo",
- trx_get_id_for_print(trx),
- table->name.m_name,
- undo == undo_ptr->insert_undo
- ? "insert" : "update"));
+ DBUG_LOG("ib_trx",
+ "resurrect " << ib::hex(trx->id)
+ << " IX lock on " << table->name);
dict_table_close(table, FALSE, FALSE);
}
}
}
-/****************************************************************//**
-Resurrect the transactions that were doing inserts the time of the
-crash, they need to be undone.
-@return trx_t instance */
-static
-trx_t*
-trx_resurrect_insert(
-/*=================*/
- trx_undo_t* undo, /*!< in: entry to UNDO */
- trx_rseg_t* rseg) /*!< in: rollback segment */
-{
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-
- trx->rsegs.m_redo.rseg = rseg;
- *trx->xid = undo->xid;
- trx->id = undo->trx_id;
- trx->rsegs.m_redo.insert_undo = undo;
- trx->is_recovered = true;
-
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in the prepared state
- waiting for a commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
-
- ib::info() << "Transaction "
- << trx_get_id_for_print(trx)
- << " was in the XA prepared state.";
-
- trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- } else {
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx no; this should have no
- relevance since purge is not interested in committed
- transaction numbers, unless they are in the history
- list, in which case it looks the number from the disk based
- undo log structure */
-
- trx->no = trx->id;
-
- } else {
- trx->state = TRX_STATE_ACTIVE;
-
- /* A running transaction always has the number
- field inited to TRX_ID_MAX */
-
- trx->no = TRX_ID_MAX;
- }
-
- /* trx_start_low() is not called with resurrect, so need to initialize
- start time here.*/
- if (trx->state != TRX_STATE_COMMITTED_IN_MEMORY) {
- trx->start_time = time(NULL);
- trx->start_time_micro = microsecond_interval_timer();
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty) {
- trx->undo_no = undo->top_undo_no + 1;
- trx->undo_rseg_space = undo->rseg->space;
- }
-
- return(trx);
-}
-
-/****************************************************************//**
-Prepared transactions are left in the prepared state waiting for a
-commit or abort decision from MySQL */
-static
-void
-trx_resurrect_update_in_prepared_state(
-/*===================================*/
- trx_t* trx, /*!< in,out: transaction */
- const trx_undo_t* undo) /*!< in: update UNDO record */
-{
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
-
- if (undo->state == TRX_UNDO_PREPARED) {
- ib::info() << "Transaction " << trx_get_id_for_print(trx)
- << " was in the XA prepared state.";
-
- if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- } else {
- ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
- }
-
- trx->state = TRX_STATE_PREPARED;
- } else {
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- }
+/**
+ Resurrect the transactions that were doing inserts/updates the time of the
+ crash, they need to be undone.
+*/
+
+static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
+ time_t start_time, ulonglong start_time_micro,
+ uint64_t *rows_to_undo,
+ bool is_old_insert)
+{
+ trx_state_t state;
+ /*
+ This is single-threaded startup code, we do not need the
+ protection of trx->mutex or trx_sys.mutex here.
+ */
+ switch (undo->state)
+ {
+ case TRX_UNDO_ACTIVE:
+ state= TRX_STATE_ACTIVE;
+ break;
+ case TRX_UNDO_PREPARED:
+ /*
+ Prepared transactions are left in the prepared state
+ waiting for a commit or abort decision from MySQL
+ */
+ ib::info() << "Transaction " << undo->trx_id
+ << " was in the XA prepared state.";
+
+ state= TRX_STATE_PREPARED;
+ break;
+ default:
+ if (is_old_insert && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO)
+ trx_undo_commit_cleanup(undo, false);
+ return;
+ }
+
+ trx_t *trx= trx_create();
+ trx->state= state;
+ ut_d(trx->start_file= __FILE__);
+ ut_d(trx->start_line= __LINE__);
+ ut_ad(trx->no == TRX_ID_MAX);
+
+ if (is_old_insert)
+ trx->rsegs.m_redo.old_insert= undo;
+ else
+ trx->rsegs.m_redo.undo= undo;
+
+ trx->undo_no= undo->top_undo_no + 1;
+ trx->rsegs.m_redo.rseg= rseg;
+ /*
+ For transactions with active data will not have rseg size = 1
+ or will not qualify for purge limit criteria. So it is safe to increment
+ this trx_ref_count w/o mutex protection.
+ */
+ ++trx->rsegs.m_redo.rseg->trx_ref_count;
+ *trx->xid= undo->xid;
+ trx->id= undo->trx_id;
+ trx->is_recovered= true;
+ trx->start_time= start_time;
+ trx->start_time_micro= start_time_micro;
+
+ if (undo->dict_operation)
+ {
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ if (!trx->table_id)
+ trx->table_id= undo->table_id;
+ }
+
+ trx_sys.rw_trx_hash.insert(trx);
+ trx_sys.rw_trx_hash.put_pins(trx);
+ trx_resurrect_table_locks(trx, undo);
+ if (trx_state_eq(trx, TRX_STATE_ACTIVE))
+ *rows_to_undo+= trx->undo_no;
}
-/****************************************************************//**
-Resurrect the transactions that were doing updates the time of the
-crash, they need to be undone. */
-static
-void
-trx_resurrect_update(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_undo_t* undo, /*!< in/out: update UNDO record */
- trx_rseg_t* rseg) /*!< in/out: rollback segment */
-{
- trx->rsegs.m_redo.rseg = rseg;
- *trx->xid = undo->xid;
- trx->id = undo->trx_id;
- trx->rsegs.m_redo.update_undo = undo;
- trx->is_recovered = true;
-
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
-
- if (undo->state != TRX_UNDO_ACTIVE) {
- trx_resurrect_update_in_prepared_state(trx, undo);
-
- /* We give a dummy value for the trx number */
-
- trx->no = trx->id;
-
- } else {
- trx->state = TRX_STATE_ACTIVE;
-
- /* A running transaction always has the number field inited to
- TRX_ID_MAX */
-
- trx->no = TRX_ID_MAX;
- }
-
- /* trx_start_low() is not called with resurrect, so need to initialize
- start time here.*/
- if (trx->state == TRX_STATE_ACTIVE
- || trx->state == TRX_STATE_PREPARED) {
- trx->start_time = time(NULL);
- trx->start_time_micro = microsecond_interval_timer();
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- if (!trx->table_id) {
- trx->table_id = undo->table_id;
- }
- }
-
- if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
-
- trx->undo_no = undo->top_undo_no + 1;
- trx->undo_rseg_space = undo->rseg->space;
- }
-}
/** Initialize (resurrect) transactions at startup. */
void
@@ -893,22 +682,31 @@ trx_lists_init_at_db_start()
{
ut_a(srv_is_being_started);
ut_ad(!srv_was_started);
- ut_ad(!purge_sys);
- purge_sys = UT_NEW_NOKEY(purge_sys_t());
+ if (srv_operation == SRV_OPERATION_RESTORE) {
+ /* mariabackup --prepare only deals with
+ the redo log and the data files, not with
+ transactions or the data dictionary. */
+ trx_rseg_array_init();
+ return;
+ }
if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
return;
}
+ purge_sys.create();
trx_rseg_array_init();
/* Look from the rollback segments if there exist undo logs for
transactions. */
+ const time_t start_time = time(NULL);
+ const ulonglong start_time_micro= microsecond_interval_timer();
+ uint64_t rows_to_undo = 0;
for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
trx_undo_t* undo;
- trx_rseg_t* rseg = trx_sys->rseg_array[i];
+ trx_rseg_t* rseg = trx_sys.rseg_array[i];
/* Some rollback segment may be unavailable,
especially if the server was previously run with a
@@ -917,76 +715,59 @@ trx_lists_init_at_db_start()
continue;
}
- /* Resurrect transactions that were doing inserts. */
- for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
- undo != NULL;
- undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
- /* trx_purge() will not run before we return,
- so we can safely increment this without
- holding rseg->mutex. */
- ++rseg->trx_ref_count;
-
- trx_t* trx;
-
- trx = trx_resurrect_insert(undo, rseg);
-
- trx_sys_rw_trx_add(trx);
-
- trx_resurrect_table_locks(
- trx, &trx->rsegs.m_redo, undo);
+ /* Resurrect transactions that were doing inserts
+ using the old separate insert_undo log. */
+ undo = UT_LIST_GET_FIRST(rseg->old_insert_list);
+ while (undo) {
+ trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo);
+ trx_resurrect(undo, rseg, start_time, start_time_micro,
+ &rows_to_undo, true);
+ undo = next;
}
- /* Ressurrect transactions that were doing updates. */
- for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
+ /* Ressurrect other transactions. */
+ for (undo = UT_LIST_GET_FIRST(rseg->undo_list);
undo != NULL;
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
- /* Check the trx_sys->rw_trx_set first. */
- trx_sys_mutex_enter();
-
- trx_t* trx = trx_get_rw_trx_by_id(undo->trx_id);
-
- trx_sys_mutex_exit();
-
- if (trx == NULL) {
- trx = trx_allocate_for_background();
- ++rseg->trx_ref_count;
-
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
+ trx_t *trx = trx_sys.find(0, undo->trx_id, false);
+ if (!trx) {
+ trx_resurrect(undo, rseg, start_time,
+ start_time_micro,
+ &rows_to_undo, false);
+ } else {
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+ trx_state_eq(trx, TRX_STATE_PREPARED));
+ ut_ad(trx->start_time == start_time);
+ ut_ad(trx->is_recovered);
+ ut_ad(trx->rsegs.m_redo.rseg == rseg);
+ ut_ad(trx->rsegs.m_redo.rseg->trx_ref_count);
+
+ trx->rsegs.m_redo.undo = undo;
+ if (undo->top_undo_no >= trx->undo_no) {
+ if (trx_state_eq(trx,
+ TRX_STATE_ACTIVE)) {
+ rows_to_undo -= trx->undo_no;
+ rows_to_undo +=
+ undo->top_undo_no + 1;
+ }
+
+ trx->undo_no = undo->top_undo_no + 1;
+ }
+ trx_resurrect_table_locks(trx, undo);
}
-
- trx_resurrect_update(trx, undo, rseg);
-
- trx_sys_rw_trx_add(trx);
-
- trx_resurrect_table_locks(
- trx, &trx->rsegs.m_redo, undo);
}
}
- TrxIdSet::iterator end = trx_sys->rw_trx_set.end();
-
- for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin();
- it != end;
- ++it) {
-
- ut_ad(it->m_trx->in_rw_trx_list);
-#ifdef UNIV_DEBUG
- if (it->m_trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = it->m_trx->id;
- }
-#endif /* UNIV_DEBUG */
-
- if (it->m_trx->state == TRX_STATE_ACTIVE
- || it->m_trx->state == TRX_STATE_PREPARED) {
+ if (trx_sys.rw_trx_hash.size()) {
- trx_sys->rw_trx_ids.push_back(it->m_id);
- }
+ ib::info() << trx_sys.rw_trx_hash.size()
+ << " transaction(s) which must be rolled back or"
+ " cleaned up in total " << rows_to_undo
+ << " row operations to undo";
- UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
+ ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id();
}
+ trx_sys.clone_oldest_view();
}
/** Assign a persistent rollback segment in a round-robin fashion,
@@ -1001,7 +782,7 @@ static trx_rseg_t* trx_assign_rseg_low()
}
/* The first slot is always assigned to the system tablespace. */
- ut_ad(trx_sys->rseg_array[0]->space == TRX_SYS_SPACE);
+ ut_ad(trx_sys.rseg_array[0]->space == fil_system.sys_space);
/* Choose a rollback segment evenly distributed between 0 and
innodb_undo_logs-1 in a round-robin fashion, skipping those
@@ -1024,7 +805,7 @@ static trx_rseg_t* trx_assign_rseg_low()
do {
for (;;) {
- rseg = trx_sys->rseg_array[slot];
+ rseg = trx_sys.rseg_array[slot];
#ifdef UNIV_DEBUG
/* Ensure that we are not revisiting the same
@@ -1043,14 +824,14 @@ static trx_rseg_t* trx_assign_rseg_low()
ut_ad(rseg->is_persistent());
- if (rseg->space != TRX_SYS_SPACE) {
+ if (rseg->space != fil_system.sys_space) {
if (rseg->skip_allocation
|| !srv_undo_tablespaces) {
continue;
}
} else if (trx_rseg_t* next
- = trx_sys->rseg_array[slot]) {
- if (next->space != TRX_SYS_SPACE
+ = trx_sys.rseg_array[slot]) {
+ if (next->space != fil_system.sys_space
&& srv_undo_tablespaces > 0) {
/** If dedicated
innodb_undo_tablespaces have
@@ -1080,11 +861,11 @@ static trx_rseg_t* trx_assign_rseg_low()
}
/** Set the innodb_log_optimize_ddl page flush observer
-@param[in] space_id tablespace id
-@param[in,out] stage performance_schema accounting */
-void trx_t::set_flush_observer(ulint space_id, ut_stage_alter_t* stage)
+@param[in,out] space tablespace
+@param[in,out] stage performance_schema accounting */
+void trx_t::set_flush_observer(fil_space_t* space, ut_stage_alter_t* stage)
{
- flush_observer = UT_NEW_NOKEY(FlushObserver(space_id, this, stage));
+ flush_observer = UT_NEW_NOKEY(FlushObserver(space, this, stage));
}
/** Remove the flush observer */
@@ -1109,17 +890,13 @@ trx_t::assign_temp_rseg()
multiple transactions that start modifications concurrently
will write their undo log to the same rollback segment. */
static ulong rseg_slot;
- trx_rseg_t* rseg = trx_sys->temp_rsegs[
+ trx_rseg_t* rseg = trx_sys.temp_rsegs[
rseg_slot++ & (TRX_SYS_N_RSEGS - 1)];
ut_ad(!rseg->is_persistent());
rsegs.m_noredo.rseg = rseg;
if (id == 0) {
- mutex_enter(&trx_sys->mutex);
- id = trx_sys_get_new_trx_id();
- trx_sys->rw_trx_ids.push_back(id);
- trx_sys->rw_trx_set.insert(TrxTrack(id, this));
- mutex_exit(&trx_sys->mutex);
+ trx_sys.register_rw(this);
}
ut_ad(!rseg->is_persistent());
@@ -1171,17 +948,14 @@ trx_start_low(
ut_a(ib_vector_is_empty(trx->autoinc_locks));
ut_a(trx->lock.table_locks.empty());
- /* If this transaction came from trx_allocate_for_mysql(),
- trx->in_mysql_trx_list would hold. In that case, the trx->state
- change must be protected by the trx_sys->mutex, so that
- lock_print_info_all_transactions() will have a consistent view. */
-
- ut_ad(!trx->in_rw_trx_list);
+ /* No other thread can access this trx object through rw_trx_hash, thus
+ we don't need trx_sys.mutex protection for that purpose. Still this
+ trx can be found through trx_sys.trx_list, which means state
+ change must be protected by e.g. trx->mutex.
- /* We tend to over assert and that complicates the code somewhat.
- e.g., the transaction state can be set earlier but we are forced to
- set it under the protection of the trx_sys_t::mutex because some
- trx list assertions are triggered unnecessarily. */
+ For now we update it without mutex protection, because original code
+ did it this way. It has to be reviewed and fixed properly. */
+ trx->state = TRX_STATE_ACTIVE;
/* By default all transactions are in the read-only list unless they
are non-locking auto-commit read only transactions or background
@@ -1192,37 +966,14 @@ trx_start_low(
if (!trx->read_only
&& (trx->mysql_thd == 0 || read_write || trx->ddl)) {
- trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
-
/* Temporary rseg is assigned only if the transaction
updates a temporary table */
-
- trx_sys_mutex_enter();
-
- trx->id = trx_sys_get_new_trx_id();
-
- trx_sys->rw_trx_ids.push_back(trx->id);
-
- trx_sys_rw_trx_add(trx);
-
+ trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
ut_ad(trx->rsegs.m_redo.rseg != 0
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
- UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
-
- ut_d(trx->in_rw_trx_list = true);
-#ifdef UNIV_DEBUG
- if (trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = trx->id;
- }
-#endif /* UNIV_DEBUG */
-
- trx->state = TRX_STATE_ACTIVE;
-
- ut_ad(trx_sys_validate_trx_list());
-
- trx_sys_mutex_exit();
+ trx_sys.register_rw(trx);
} else {
if (!trx_is_autocommit_non_locking(trx)) {
@@ -1231,26 +982,11 @@ trx_start_low(
to write to the temporary table. */
if (read_write) {
-
- trx_sys_mutex_enter();
-
ut_ad(!srv_read_only_mode);
-
- trx->id = trx_sys_get_new_trx_id();
-
- trx_sys->rw_trx_ids.push_back(trx->id);
-
- trx_sys->rw_trx_set.insert(
- TrxTrack(trx->id, trx));
-
- trx_sys_mutex_exit();
+ trx_sys.register_rw(trx);
}
-
- trx->state = TRX_STATE_ACTIVE;
-
} else {
ut_ad(!read_write);
- trx->state = TRX_STATE_ACTIVE;
}
}
@@ -1265,52 +1001,36 @@ trx_start_low(
}
/** Set the serialisation number for a persistent committed transaction.
-@param[in,out] trx committed transaction with persistent changes
-@param[in,out] rseg rollback segment for update_undo, or NULL */
+@param[in,out] trx committed transaction with persistent changes */
static
void
-trx_serialise(trx_t* trx, trx_rseg_t* rseg)
+trx_serialise(trx_t* trx)
{
- ut_ad(!rseg || rseg == trx->rsegs.m_redo.rseg);
+ trx_rseg_t *rseg = trx->rsegs.m_redo.rseg;
+ ut_ad(rseg);
+ ut_ad(mutex_own(&rseg->mutex));
- trx_sys_mutex_enter();
-
- trx->no = trx_sys_get_new_trx_id();
+ if (rseg->last_page_no == FIL_NULL) {
+ mutex_enter(&purge_sys.pq_mutex);
+ }
- /* Track the minimum serialisation number. */
- UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
+ trx_sys.assign_new_trx_no(trx);
- /* If the rollack segment is not empty then the
+ /* If the rollback segment is not empty then the
new trx_t::no can't be less than any trx_t::no
already in the rollback segment. User threads only
produce events when a rollback segment is empty. */
- if (rseg && rseg->last_page_no == FIL_NULL) {
- TrxUndoRsegs elem(trx->no);
- elem.push_back(rseg);
-
- mutex_enter(&purge_sys->pq_mutex);
-
- /* This is to reduce the pressure on the trx_sys_t::mutex
- though in reality it should make very little (read no)
- difference because this code path is only taken when the
- rbs is empty. */
-
- trx_sys_mutex_exit();
-
- purge_sys->purge_queue.push(elem);
-
- mutex_exit(&purge_sys->pq_mutex);
- } else {
- trx_sys_mutex_exit();
+ if (rseg->last_page_no == FIL_NULL) {
+ purge_sys.purge_queue.push(TrxUndoRsegs(trx->no, *rseg));
+ mutex_exit(&purge_sys.pq_mutex);
}
}
/****************************************************************//**
Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment.
-@return true if a serialisation log was written */
+update UNDO log record to the assigned rollback segment. */
static
-bool
+void
trx_write_serialisation_history(
/*============================*/
trx_t* trx, /*!< in/out: transaction */
@@ -1341,70 +1061,43 @@ trx_write_serialisation_history(
temp_mtr.commit();
}
- if (!trx->rsegs.m_redo.rseg) {
- ut_ad(!trx->rsegs.m_redo.insert_undo);
- ut_ad(!trx->rsegs.m_redo.update_undo);
- return false;
+ trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
+ if (!rseg) {
+ ut_ad(!trx->rsegs.m_redo.undo);
+ ut_ad(!trx->rsegs.m_redo.old_insert);
+ return;
}
- trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
- trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+ trx_undo_t*& undo = trx->rsegs.m_redo.undo;
+ trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert;
- if (!insert && !update) {
- return false;
+ if (!undo && !old_insert) {
+ return;
}
ut_ad(!trx->read_only);
- trx_rseg_t* update_rseg = update ? trx->rsegs.m_redo.rseg : NULL;
- mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+ ut_ad(!undo || undo->rseg == rseg);
+ ut_ad(!old_insert || old_insert->rseg == rseg);
+ mutex_enter(&rseg->mutex);
/* Assign the transaction serialisation number and add any
- update_undo log to the purge queue. */
- trx_serialise(trx, update_rseg);
+ undo log to the purge queue. */
+ trx_serialise(trx);
- /* It is not necessary to acquire trx->undo_mutex here because
- only a single OS thread is allowed to commit this transaction. */
- if (insert) {
- trx_undo_set_state_at_finish(insert, mtr);
+ if (UNIV_LIKELY_NULL(old_insert)) {
+ UT_LIST_REMOVE(rseg->old_insert_list, old_insert);
+ trx_purge_add_undo_to_history(trx, old_insert, mtr);
}
- if (update) {
- /* The undo logs and possible delete-marked records
- for updates and deletes will be purged later. */
- page_t* undo_hdr_page = trx_undo_set_state_at_finish(
- update, mtr);
-
- trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
+ if (undo) {
+ UT_LIST_REMOVE(rseg->undo_list, undo);
+ trx_purge_add_undo_to_history(trx, undo, mtr);
}
- mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+ mutex_exit(&rseg->mutex);
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
- trx_sysf_t* sys_header = trx_sysf_get(mtr);
-#ifdef WITH_WSREP
- /* Update latest MySQL wsrep XID in trx sys header. */
- if (wsrep_is_wsrep_xid(trx->xid)) {
- trx_sys_update_wsrep_checkpoint(trx->xid, sys_header, mtr);
- }
-#endif /* WITH_WSREP */
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name != NULL
- && trx->mysql_log_file_name[0] != '\0') {
-
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- sys_header,
- mtr);
-
- trx->mysql_log_file_name = NULL;
- }
-
- return(true);
+ trx->mysql_log_file_name = NULL;
}
/********************************************************************
@@ -1533,14 +1226,27 @@ trx_update_mod_tables_timestamp(
/*============================*/
trx_t* trx) /*!< in: transaction */
{
-
- ut_ad(trx->id != 0);
-
/* consider using trx->start_time if calling time() is too
expensive here */
const time_t now = time(NULL);
trx_mod_tables_t::const_iterator end = trx->mod_tables.end();
+#ifdef UNIV_DEBUG
+# if MYSQL_VERSION_ID >= 100405
+# define dict_sys_mutex dict_sys.mutex
+# else
+# define dict_sys_mutex dict_sys->mutex
+# endif
+
+ const bool preserve_tables = !innodb_evict_tables_on_commit_debug
+ || trx->is_recovered /* avoid trouble with XA recovery */
+# if 1 /* if dict_stats_exec_sql() were not playing dirty tricks */
+ || mutex_own(&dict_sys_mutex)
+# else /* this would be more proper way to do it */
+ || trx->dict_operation_lock_mode || trx->dict_operation
+# endif
+ ;
+#endif
for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin();
it != end;
@@ -1554,56 +1260,30 @@ trx_update_mod_tables_timestamp(
"garbage" in table->update_time is justified because
protecting it with a latch here would be too performance
intrusive. */
- (*it)->update_time = now;
- }
-
- trx->mod_tables.clear();
-}
-
-/**
-Erase the transaction from running transaction lists and serialization
-list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
-won't include this transaction after this call. All implicit locks are
-also released by this call as trx is removed from rw_trx_list.
-@param[in] trx Transaction to erase, must have an ID > 0
-@param[in] serialised true if serialisation log was written */
-static
-void
-trx_erase_lists(
- trx_t* trx,
- bool serialised)
-{
- ut_ad(trx->id > 0);
- trx_sys_mutex_enter();
-
- if (serialised) {
- UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
- }
-
- trx_ids_t::iterator it = std::lower_bound(
- trx_sys->rw_trx_ids.begin(),
- trx_sys->rw_trx_ids.end(),
- trx->id);
- ut_ad(*it == trx->id);
- trx_sys->rw_trx_ids.erase(it);
-
- if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
-
- ut_ad(!trx->in_rw_trx_list);
- } else {
-
- UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = false);
- ut_ad(trx_sys_validate_trx_list());
-
- if (trx->read_view != NULL) {
- trx_sys->mvcc->view_close(trx->read_view, true);
+ dict_table_t* table = it->first;
+ table->update_time = now;
+#ifdef UNIV_DEBUG
+ if (preserve_tables || table->get_ref_count()) {
+ /* do not evict when committing DDL operations
+ or if some other transaction is holding the
+ table handle */
+ continue;
}
+ /* recheck while holding the mutex that blocks
+ table->acquire() */
+ mutex_enter(&dict_sys_mutex);
+ if (!table->get_ref_count()) {
+# if MYSQL_VERSION_ID >= 100405
+ dict_sys.remove(table, true);
+# else
+ dict_table_remove_from_cache_low(table, true);
+# endif
+ }
+ mutex_exit(&dict_sys_mutex);
+#endif
}
- trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
-
- trx_sys_mutex_exit();
+ trx->mod_tables.clear();
}
/****************************************************************//**
@@ -1613,21 +1293,18 @@ void
trx_commit_in_memory(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
- const mtr_t* mtr, /*!< in: mini-transaction of
+ const mtr_t* mtr) /*!< in: mini-transaction of
trx_write_serialisation_history(), or NULL if
the transaction did not modify anything */
- bool serialised)
- /*!< in: true if serialisation log was
- written */
{
trx->must_flush_log_later = false;
+ trx->read_view.close();
if (trx_is_autocommit_non_locking(trx)) {
ut_ad(trx->id == 0);
ut_ad(trx->read_only);
ut_a(!trx->is_recovered);
ut_ad(trx->rsegs.m_redo.rseg == NULL);
- ut_ad(!trx->in_rw_trx_list);
/* Note: We are asserting without holding the lock mutex. But
that is OK because this transaction is not waiting and cannot
@@ -1640,15 +1317,11 @@ trx_commit_in_memory(
there is an inherent race here around state transition during
printouts. We ignore this race for the sake of efficiency.
However, the trx_sys_t::mutex will protect the trx_t instance
- and it cannot be removed from the mysql_trx_list and freed
+ and it cannot be removed from the trx_list and freed
without first acquiring the trx_sys_t::mutex. */
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- if (trx->read_view != NULL) {
- trx_sys->mvcc->view_close(trx->read_view, false);
- }
-
MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
DBUG_LOG("trx", "Autocommit in memory: " << trx);
@@ -1656,9 +1329,9 @@ trx_commit_in_memory(
} else {
if (trx->id > 0) {
/* For consistent snapshot, we need to remove current
- transaction from running transaction id list for mvcc
- before doing commit and releasing locks. */
- trx_erase_lists(trx, serialised);
+ transaction from rw_trx_hash before doing commit and
+ releasing locks. */
+ trx_sys.deregister_rw(trx);
}
lock_trx_release_locks(trx);
@@ -1671,21 +1344,16 @@ trx_commit_in_memory(
DEBUG_SYNC_C("after_trx_committed_in_memory");
if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
-
MONITOR_INC(MONITOR_TRX_RO_COMMIT);
- if (trx->read_view != NULL) {
- trx_sys->mvcc->view_close(
- trx->read_view, false);
- }
-
} else {
+ trx_update_mod_tables_timestamp(trx);
MONITOR_INC(MONITOR_TRX_RW_COMMIT);
}
trx->id = 0;
}
- ut_ad(!trx->rsegs.m_redo.update_undo);
+ ut_ad(!trx->rsegs.m_redo.undo);
if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) {
mutex_enter(&rseg->mutex);
@@ -1693,14 +1361,14 @@ trx_commit_in_memory(
--rseg->trx_ref_count;
mutex_exit(&rseg->mutex);
- if (trx_undo_t*& insert = trx->rsegs.m_redo.insert_undo) {
+ if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) {
ut_ad(insert->rseg == rseg);
trx_undo_commit_cleanup(insert, false);
insert = NULL;
}
}
- ut_ad(!trx->rsegs.m_redo.insert_undo);
+ ut_ad(!trx->rsegs.m_redo.old_insert);
if (mtr != NULL) {
if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
@@ -1782,9 +1450,6 @@ trx_commit_in_memory(
DBUG_LOG("trx", "Commit in memory: " << trx);
trx->state = TRX_STATE_NOT_STARTED;
- /* trx->in_mysql_trx_list would hold between
- trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
- hold for recovered transactions or system transactions. */
assert_trx_is_free(trx);
trx_init(trx);
@@ -1795,19 +1460,18 @@ trx_commit_in_memory(
srv_wake_purge_thread_if_not_active();
}
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-void
-trx_commit_low(
-/*===========*/
- trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
- or NULL if trx made no modifications */
+/** Commit a transaction and a mini-transaction.
+@param[in,out] trx transaction
+@param[in,out] mtr mini-transaction (NULL if no modifications) */
+void trx_commit_low(trx_t* trx, mtr_t* mtr)
{
assert_trx_nonlocking_or_in_list(trx);
ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
ut_ad(!mtr || mtr->is_active());
- ut_ad(!mtr == !trx->has_logged());
+ ut_d(bool aborted = trx->in_rollback
+ && trx->error_state == DB_DEADLOCK);
+ ut_ad(!mtr == (aborted || !trx->has_logged_or_recovered()));
+ ut_ad(!mtr || !aborted);
/* undo_no is non-zero if we're doing the final commit. */
if (trx->fts_trx != NULL && trx->undo_no != 0) {
@@ -1831,10 +1495,12 @@ trx_commit_low(
}
}
- bool serialised;
+#ifndef DBUG_OFF
+ const bool debug_sync = trx->mysql_thd && trx->has_logged_persistent();
+#endif
if (mtr != NULL) {
- serialised = trx_write_serialisation_history(trx, mtr);
+ trx_write_serialisation_history(trx, mtr);
/* The following call commits the mini-transaction, making the
whole transaction committed in the file-based world, at this
@@ -1863,9 +1529,6 @@ trx_commit_low(
DBUG_SUICIDE();
});
/*--------------*/
-
- } else {
- serialised = false;
}
#ifndef DBUG_OFF
/* In case of this function is called from a stack executing
@@ -1876,12 +1539,12 @@ trx_commit_low(
thd->debug_sync_control defined any longer. However the stack
is possible only with a prepared trx not updating any data.
*/
- if (trx->mysql_thd != NULL && trx->has_logged_persistent()) {
+ if (debug_sync) {
DEBUG_SYNC_C("before_trx_state_committed_in_memory");
}
#endif
- trx_commit_in_memory(trx, mtr, serialised);
+ trx_commit_in_memory(trx, mtr);
}
/****************************************************************//**
@@ -1897,7 +1560,7 @@ trx_commit(
DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
DBUG_SUICIDE(););
- if (trx->has_logged()) {
+ if (trx->has_logged_or_recovered()) {
mtr = &local_mtr;
mtr->start();
} else {
@@ -1909,82 +1572,13 @@ trx_commit(
}
/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->is_recovered);
- ut_ad(!trx->rsegs.m_noredo.undo);
- ut_ad(!trx->rsegs.m_redo.update_undo);
-
- if (trx_undo_t*& undo = trx->rsegs.m_redo.insert_undo) {
- ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
- trx_undo_commit_cleanup(undo, false);
- undo = NULL;
- }
-
- memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
- trx->undo_no = 0;
- trx->undo_rseg_space = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
-
- trx_sys_mutex_enter();
-
- ut_a(!trx->read_only);
-
- UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
-
- ut_d(trx->in_rw_trx_list = FALSE);
-
- trx_sys_mutex_exit();
-
- /* Change the transaction state without mutex protection, now
- that it no longer is in the trx_list. Recovered transactions
- are never placed in the mysql_trx_list. */
- ut_ad(trx->is_recovered);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
- DBUG_LOG("trx", "Cleanup at startup: " << trx);
- trx->id = 0;
- trx->state = TRX_STATE_NOT_STARTED;
-}
-
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-ReadView*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx) /*!< in/out: active transaction */
-{
- ut_ad(trx->state == TRX_STATE_ACTIVE);
-
- if (srv_read_only_mode) {
-
- ut_ad(trx->read_view == NULL);
- return(NULL);
-
- } else if (!MVCC::is_view_active(trx->read_view)) {
- trx_sys->mvcc->view_open(trx->read_view, trx);
- }
-
- return(trx->read_view);
-}
-
-/****************************************************************//**
Prepares a transaction for commit/rollback. */
void
trx_commit_or_rollback_prepare(
/*===========================*/
trx_t* trx) /*!< in/out: transaction */
{
- /* We are reading trx->state without holding trx_sys->mutex
+ /* We are reading trx->state without holding trx_sys.mutex
here, because the commit or rollback should be invoked for a
running (or recovered prepared) transaction that is associated
with the current thread. */
@@ -2110,10 +1704,6 @@ trx_commit_for_mysql(
case TRX_STATE_PREPARED_RECOVERED:
trx->op_info = "committing";
- if (trx->id != 0) {
- trx_update_mod_tables_timestamp(trx);
- }
-
trx_commit(trx);
MONITOR_DEC(MONITOR_TRX_ACTIVE);
@@ -2162,7 +1752,6 @@ trx_mark_sql_stat_end(
break;
case TRX_STATE_NOT_STARTED:
trx->undo_no = 0;
- trx->undo_rseg_space = 0;
/* fall through */
case TRX_STATE_ACTIVE:
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
@@ -2178,8 +1767,7 @@ trx_mark_sql_stat_end(
}
/**********************************************************************//**
-Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
+Prints info about a transaction. */
void
trx_print_low(
/*==========*/
@@ -2200,12 +1788,10 @@ trx_print_low(
ibool newline;
const char* op_info;
- ut_ad(trx_sys_mutex_own());
-
fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
/* trx->state cannot change from or to NOT_STARTED while we
- are holding the trx_sys->mutex. It may change from ACTIVE to
+ are holding the trx_sys.mutex. It may change from ACTIVE to
PREPARED or COMMITTED. */
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
@@ -2299,7 +1885,7 @@ state_ok:
/**********************************************************************//**
Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
+The caller must hold lock_sys.mutex.
When possible, use trx_print() instead. */
void
trx_print_latched(
@@ -2310,7 +1896,6 @@ trx_print_latched(
or 0 to use the default max length */
{
ut_ad(lock_mutex_own());
- ut_ad(trx_sys_mutex_own());
trx_print_low(f, trx, max_query_len,
lock_number_of_rows_locked(&trx->lock),
@@ -2318,116 +1903,9 @@ trx_print_latched(
mem_heap_get_size(trx->lock.lock_heap));
}
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
- FILE* f,
- /*!< in: output stream */
- const trx_t* trx,
- /*!< in: transaction */
- ulint max_query_len)
- /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ibool newline;
- const char* op_info;
-
- ut_ad(lock_mutex_own());
- ut_ad(trx->lock.trx_locks.count > 0);
-
- fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
-
- /* trx->state may change since trx_sys->mutex is not required */
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- fputs(", not started", f);
- goto state_ok;
- case TRX_STATE_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_PREPARED:
- case TRX_STATE_PREPARED_RECOVERED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- goto state_ok;
- }
- fprintf(f, ", state %lu", (ulong) trx->state);
- ut_ad(0);
-state_ok:
-
- /* prevent a race condition */
- op_info = trx->op_info;
-
- if (*op_info) {
- putc(' ', f);
- fputs(op_info, f);
- }
-
- if (trx->is_recovered) {
- fputs(" recovered trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- /* trx->lock.que_state of an ACTIVE transaction may change
- while we are not holding trx->mutex. We perform a dirty read
- for performance reasons. */
-
- switch (trx->lock.que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
- }
-
- if (trx->undo_no != 0) {
- newline = TRUE;
- fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(
- f, trx->mysql_thd, static_cast<uint>(max_query_len));
- }
-}
-#endif /* WITH_WSREP */
/**********************************************************************//**
Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+Acquires and releases lock_sys.mutex. */
void
trx_print(
/*======*/
@@ -2446,53 +1924,10 @@ trx_print(
heap_size = mem_heap_get_size(trx->lock.lock_heap);
lock_mutex_exit();
- mutex_enter(&trx_sys->mutex);
-
trx_print_low(f, trx, max_query_len,
n_rec_locks, n_trx_locks, heap_size);
-
- mutex_exit(&trx_sys->mutex);
}
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-ibool
-trx_assert_started(
-/*===============*/
- const trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx_sys_mutex_own());
-
- /* Non-locking autocommits should not hold any locks and this
- function is only called from the locking code. */
- check_trx_state(trx);
-
- /* trx->state can change from or to NOT_STARTED while we are holding
- trx_sys->mutex for non-locking autocommit selects but not for other
- types of transactions. It may change from ACTIVE to PREPARED. Unless
- we are holding lock_sys->mutex, it may also change to COMMITTED. */
-
- switch (trx->state) {
- case TRX_STATE_PREPARED:
- case TRX_STATE_PREPARED_RECOVERED:
- return(TRUE);
-
- case TRX_STATE_ACTIVE:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- return(TRUE);
-
- case TRX_STATE_NOT_STARTED:
- break;
- }
-
- ut_error;
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
/*******************************************************************//**
Compares the "weight" (or size) of two transactions. Transactions that
have edited non-transactional tables are considered heavier than ones
@@ -2535,11 +1970,10 @@ static
lsn_t
trx_prepare_low(trx_t* trx)
{
- mtr_t mtr;
+ ut_ad(!trx->rsegs.m_redo.old_insert);
+ ut_ad(!trx->is_recovered);
- /* It is not necessary to acquire trx->undo_mutex here because
- only the owning (connection) thread of the transaction is
- allowed to perform XA PREPARE. */
+ mtr_t mtr;
if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
@@ -2554,15 +1988,15 @@ trx_prepare_low(trx_t* trx)
mtr.commit();
}
- trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
- trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+ trx_undo_t* undo = trx->rsegs.m_redo.undo;
- if (!insert && !update) {
+ if (!undo) {
/* There were no changes to persistent tables. */
return(0);
}
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
+ ut_ad(undo->rseg == rseg);
mtr.start();
@@ -2572,17 +2006,7 @@ trx_prepare_low(trx_t* trx)
world, at the serialization point of lsn. */
mutex_enter(&rseg->mutex);
-
- if (insert) {
- ut_ad(insert->rseg == rseg);
- trx_undo_set_state_at_prepare(trx, insert, false, &mtr);
- }
-
- if (update) {
- ut_ad(update->rseg == rseg);
- trx_undo_set_state_at_prepare(trx, update, false, &mtr);
- }
-
+ trx_undo_set_state_at_prepare(trx, undo, false, &mtr);
mutex_exit(&rseg->mutex);
/* Make the XA PREPARE durable. */
@@ -2607,13 +2031,10 @@ trx_prepare(
DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
- /*--------------------------------------*/
ut_a(trx->state == TRX_STATE_ACTIVE);
- trx_sys_mutex_enter();
+ trx_mutex_enter(trx);
trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys_mutex_exit();
- /*--------------------------------------*/
+ trx_mutex_exit(trx);
if (lsn) {
/* Depending on the my.cnf options, we may now write the log
@@ -2649,159 +2070,157 @@ void trx_prepare_for_mysql(trx_t* trx)
trx->op_info = "";
}
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len) /*!< in: number of slots in xid_list */
-{
- trx_t* trx;
- ulint count = 0;
-
- ut_ad(xid_list);
- ut_ad(len);
-
- /* We should set those transactions which are in the prepared state
- to the xid_list */
-
- trx_sys_mutex_enter();
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_rw_list(trx);
- /* The state of a read-write transaction cannot change
- from or to NOT_STARTED while we are holding the
- trx_sys->mutex. It may change to PREPARED, but not if
- trx->is_recovered. It may also change to COMMITTED. */
- if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
- trx->state = TRX_STATE_PREPARED_RECOVERED;
- xid_list[count] = *trx->xid;
+struct trx_recover_for_mysql_callback_arg
+{
+ XID *xid_list;
+ uint len;
+ uint count;
+};
- if (count == 0) {
- ib::info() << "Starting recovery for"
- " XA transactions...";
- }
- ib::info() << "Transaction "
- << trx_get_id_for_print(trx)
- << " in prepared state after recovery";
+static my_bool trx_recover_for_mysql_callback(rw_trx_hash_element_t *element,
+ trx_recover_for_mysql_callback_arg *arg)
+{
+ DBUG_ASSERT(arg->len > 0);
+ mutex_enter(&element->mutex);
+ if (trx_t *trx= element->trx)
+ {
+ /*
+ The state of a read-write transaction can only change from ACTIVE to
+ PREPARED while we are holding the element->mutex. But since it is
+ executed at startup no state change should occur.
+ */
+ if (trx_state_eq(trx, TRX_STATE_PREPARED))
+ {
+ ut_ad(trx->is_recovered);
+ ut_ad(trx->id);
+ if (arg->count == 0)
+ ib::info() << "Starting recovery for XA transactions...";
+ XID& xid= arg->xid_list[arg->count];
+ if (arg->count++ < arg->len)
+ {
+ trx->state= TRX_STATE_PREPARED_RECOVERED;
+ ib::info() << "Transaction " << trx->id
+ << " in prepared state after recovery";
+ ib::info() << "Transaction contains changes to " << trx->undo_no
+ << " rows";
+ xid= *trx->xid;
+ }
+ }
+ }
+ mutex_exit(&element->mutex);
+ /* Do not terminate upon reaching arg->len; count all transactions */
+ return false;
+}
+
+
+static my_bool trx_recover_reset_callback(rw_trx_hash_element_t *element,
+ void*)
+{
+ mutex_enter(&element->mutex);
+ if (trx_t *trx= element->trx)
+ {
+ if (trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED))
+ trx->state= TRX_STATE_PREPARED;
+ }
+ mutex_exit(&element->mutex);
+ return false;
+}
- ib::info() << "Transaction contains changes to "
- << trx->undo_no << " rows";
- count++;
+/**
+ Find prepared transaction objects for recovery.
- if (count == len) {
- goto partial;
- }
- }
- }
+ @param[out] xid_list prepared transactions
+ @param[in] len number of slots in xid_list
- /* After returning the full list, reset the state, because
- there will be a second call to recover the transactions. */
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- if (trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)) {
- trx->state = TRX_STATE_PREPARED;
- }
- }
+ @return number of prepared transactions stored in xid_list
+*/
-partial:
- trx_sys_mutex_exit();
+int trx_recover_for_mysql(XID *xid_list, uint len)
+{
+ trx_recover_for_mysql_callback_arg arg= { xid_list, len, 0 };
- if (count > 0){
- ib::info() << count << " transactions in prepared state"
- " after recovery";
- }
+ ut_ad(xid_list);
+ ut_ad(len);
- return(int (count));
+ /* Fill xid_list with PREPARED transactions. */
+ trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
+ (trx_recover_for_mysql_callback), &arg);
+ if (arg.count)
+ {
+ ib::info() << arg.count
+ << " transactions in prepared state after recovery";
+ /* After returning the full list, reset the state, because
+ init_server_components() wants to recover the collection of
+ transactions twice, by first calling tc_log->open() and then
+ ha_recover() directly. */
+ if (arg.count <= len)
+ trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+ (trx_recover_reset_callback), NULL);
+ }
+ return int(std::min(arg.count, len));
}
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-static MY_ATTRIBUTE((warn_unused_result))
-trx_t*
-trx_get_trx_by_xid_low(
-/*===================*/
- XID* xid) /*!< in: X/Open XA transaction
- identifier */
-{
- trx_t* trx;
-
- ut_ad(trx_sys_mutex_own());
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
+struct trx_get_trx_by_xid_callback_arg
+{
+ XID *xid;
+ trx_t *trx;
+};
- /* Compare two X/Open XA transaction id's: their
- length should be the same and binary comparison
- of gtrid_length+bqual_length bytes should be
- the same */
- if (trx->is_recovered
- && (trx_state_eq(trx, TRX_STATE_PREPARED)
- || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED))
- && xid->eq(trx->xid)) {
+static my_bool trx_get_trx_by_xid_callback(rw_trx_hash_element_t *element,
+ trx_get_trx_by_xid_callback_arg *arg)
+{
+ my_bool found= 0;
+ mutex_enter(&element->mutex);
+ if (trx_t *trx= element->trx)
+ {
+ if (trx->is_recovered &&
+ (trx_state_eq(trx, TRX_STATE_PREPARED) ||
+ trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)) &&
+ arg->xid->eq(reinterpret_cast<XID*>(trx->xid)))
+ {
#ifdef WITH_WSREP
- /* The commit of a prepared recovered Galera
- transaction needs a valid trx->xid for
- invoking trx_sys_update_wsrep_checkpoint(). */
- if (wsrep_is_wsrep_xid(trx->xid)) break;
+ /* The commit of a prepared recovered Galera
+ transaction needs a valid trx->xid for
+ invoking trx_sys_update_wsrep_checkpoint(). */
+ if (!wsrep_is_wsrep_xid(trx->xid))
#endif
- /* Invalidate the XID, so that subsequent calls
- will not find it. */
- trx->xid->null();
- break;
- }
- }
-
- return(trx);
+ /* Invalidate the XID, so that subsequent calls will not find it. */
+ trx->xid->null();
+ arg->trx= trx;
+ found= 1;
+ }
+ }
+ mutex_exit(&element->mutex);
+ return found;
}
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- XID* xid) /*!< in: X/Open XA transaction identifier */
-{
- trx_t* trx;
- if (xid == NULL) {
+/**
+ Finds PREPARED XA transaction by xid.
- return(NULL);
- }
+ trx may have been committed, unless the caller is holding lock_sys.mutex.
- trx_sys_mutex_enter();
+ @param[in] xid X/Open XA transaction identifier
- /* Recovered/Resurrected transactions are always only on the
- trx_sys_t::rw_trx_list. */
- trx = trx_get_trx_by_xid_low((XID*)xid);
+ @return trx or NULL; on match, the trx->xid will be invalidated;
+*/
- trx_sys_mutex_exit();
+trx_t *trx_get_trx_by_xid(XID *xid)
+{
+ trx_get_trx_by_xid_callback_arg arg= { xid, 0 };
- return(trx);
+ if (xid)
+ trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+ (trx_get_trx_by_xid_callback), &arg);
+ return arg.trx;
}
+
/*************************************************************//**
Starts the transaction if it is not yet started. */
void
@@ -2820,7 +2239,7 @@ trx_start_if_not_started_xa_low(
/* If the transaction is tagged as read-only then
it can only write to temp tables and for such
transactions we don't want to move them to the
- trx_sys_t::rw_trx_list. */
+ trx_sys_t::rw_trx_hash. */
if (!trx->read_only) {
trx_set_rw_mode(trx);
}
@@ -2921,15 +2340,6 @@ trx_start_for_ddl_low(
return;
case TRX_STATE_ACTIVE:
-
- /* We have this start if not started idiom, therefore we
- can't add stronger checks here. */
- trx->ddl = true;
-
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- ut_ad(trx->will_lock > 0);
- return;
-
case TRX_STATE_PREPARED:
case TRX_STATE_PREPARED_RECOVERED:
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -2952,48 +2362,28 @@ trx_set_rw_mode(
trx_t* trx) /*!< in/out: transaction that is RW */
{
ut_ad(trx->rsegs.m_redo.rseg == 0);
- ut_ad(!trx->in_rw_trx_list);
ut_ad(!trx_is_autocommit_non_locking(trx));
ut_ad(!trx->read_only);
+ ut_ad(trx->id == 0);
if (high_level_read_only) {
return;
}
/* Function is promoting existing trx from ro mode to rw mode.
- In this process it has acquired trx_sys->mutex as it plan to
+ In this process it has acquired trx_sys.mutex as it plan to
move trx from ro list to rw list. If in future, some other thread
looks at this trx object while it is being promoted then ensure
that both threads are synced by acquring trx->mutex to avoid decision
based on in-consistent view formed during promotion. */
trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
-
ut_ad(trx->rsegs.m_redo.rseg != 0);
- mutex_enter(&trx_sys->mutex);
-
- ut_ad(trx->id == 0);
- trx->id = trx_sys_get_new_trx_id();
-
- trx_sys->rw_trx_ids.push_back(trx->id);
-
- trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+ trx_sys.register_rw(trx);
/* So that we can see our own changes. */
- if (MVCC::is_view_active(trx->read_view)) {
- MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
- }
-
-#ifdef UNIV_DEBUG
- if (trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = trx->id;
+ if (trx->read_view.is_open()) {
+ trx->read_view.set_creator_trx_id(trx->id);
}
-#endif /* UNIV_DEBUG */
-
- UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
-
- ut_d(trx->in_rw_trx_list = true);
-
- mutex_exit(&trx_sys->mutex);
}
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 7ff6ae8fc3f..14f4e9b31fe 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -74,16 +74,19 @@ can still remove old versions from the bottom of the stack. */
-------------------------------------------------------------------
latches?
-------
-The contention of the trx_sys_t::mutex should be minimized. When a transaction
+The contention of the trx_sys.mutex should be minimized. When a transaction
does its first insert or modify in an index, an undo log is assigned for it.
Then we must have an x-latch to the rollback segment header.
- When the transaction does more modifys or rolls back, the undo log is
-protected with undo_mutex in the transaction.
- When the transaction commits, its insert undo log is either reset and
-cached for a fast reuse, or freed. In these cases we must have an x-latch on
-the rollback segment page. The update undo log is put to the history list. If
-it is not suitable for reuse, its slot in the rollback segment is reset. In
-both cases, an x-latch must be acquired on the rollback segment.
+ When the transaction performs modifications or rolls back, its
+undo log is protected by undo page latches.
+Only the thread that is associated with the transaction may hold multiple
+undo page latches at a time. Undo pages are always private to a single
+transaction. Other threads that are performing MVCC reads
+or checking for implicit locks will lock at most one undo page at a time
+in trx_undo_get_undo_rec_low().
+ When the transaction commits, its persistent undo log is added
+to the history list. If it is not suitable for reuse, its slot is reset.
+In both cases, an x-latch must be acquired on the rollback segment header page.
The purge operation steps through the history list without modifying
it until a truncate operation occurs, which can remove undo logs from the end
of the list and release undo log segments. In stepping through the list,
@@ -91,16 +94,6 @@ s-latches on the undo log pages are enough, but in a truncate, x-latches must
be obtained on the rollback segment and individual pages. */
/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr); /*!< in: mtr */
-
-/********************************************************************//**
Creates and initializes an undo log memory object.
@return own: the undo log memory object */
static
@@ -109,26 +102,58 @@ trx_undo_mem_create(
/*================*/
trx_rseg_t* rseg, /*!< in: rollback segment memory object */
ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
const XID* xid, /*!< in: X/Open XA transaction identification*/
ulint page_no,/*!< in: undo log header page number */
ulint offset);/*!< in: undo log header byte offset on page */
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
+
+/** Determine the start offset of undo log records of an undo log page.
+@param[in] undo_page undo log page
+@param[in] page_no undo log header page number
+@param[in] offset undo log header offset
+@return start offset */
static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in: mtr */
+uint16_t
+trx_undo_page_get_start(const page_t* undo_page, ulint page_no, ulint offset)
+{
+ return page_no == page_get_page_no(undo_page)
+ ? mach_read_from_2(offset + TRX_UNDO_LOG_START + undo_page)
+ : TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
+}
+
+/** Get the first undo log record on a page.
+@param[in] page undo log page
+@param[in] page_no undo log header page number
+@param[in] offset undo log header page offset
+@return pointer to first record
+@retval NULL if none exists */
+static
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(page_t* page, ulint page_no, ulint offset)
+{
+ ulint start = trx_undo_page_get_start(page, page_no, offset);
+ return start == trx_undo_page_get_end(page, page_no, offset)
+ ? NULL
+ : page + start;
+}
+
+/** Get the last undo log record on a page.
+@param[in] page undo log page
+@param[in] page_no undo log header page number
+@param[in] offset undo log header page offset
+@return pointer to last record
+@retval NULL if none exists */
+static
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(page_t* page, ulint page_no, ulint offset)
+{
+ ulint end = trx_undo_page_get_end(page, page_no, offset);
+
+ return trx_undo_page_get_start(page, page_no, offset) == end
+ ? NULL
+ : page + mach_read_from_2(page + end - 2);
+}
/***********************************************************************//**
Gets the previous record in an undo log from the previous page.
@@ -172,6 +197,31 @@ trx_undo_get_prev_rec_from_prev_page(
return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
}
+/** Get the previous undo log record.
+@param[in] rec undo log record
+@param[in] page_no undo log header page number
+@param[in] offset undo log header page offset
+@return pointer to record
+@retval NULL if none */
+static
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(trx_undo_rec_t* rec, ulint page_no, ulint offset)
+{
+ page_t* undo_page;
+ ulint start;
+
+ undo_page = (page_t*) ut_align_down(rec, srv_page_size);
+
+ start = trx_undo_page_get_start(undo_page, page_no, offset);
+
+ if (start + undo_page == rec) {
+
+ return(NULL);
+ }
+
+ return(undo_page + mach_read_from_2(rec - 2));
+}
+
/***********************************************************************//**
Gets the previous record in an undo log.
@return undo log record, the page s-latched, NULL if none */
@@ -292,7 +342,7 @@ trx_undo_get_next_rec(
@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_first_rec(
- ulint space,
+ fil_space_t* space,
ulint page_no,
ulint offset,
ulint mode,
@@ -301,7 +351,7 @@ trx_undo_get_first_rec(
page_t* undo_page;
trx_undo_rec_t* rec;
- const page_id_t page_id(space, page_no);
+ const page_id_t page_id(space->id, page_no);
if (mode == RW_S_LATCH) {
undo_page = trx_undo_page_get_s_latched(page_id, mtr);
@@ -315,176 +365,202 @@ trx_undo_get_first_rec(
return(rec);
}
- return(trx_undo_get_next_rec_from_next_page(space,
+ return(trx_undo_get_next_rec_from_next_page(space->id,
undo_page, page_no, offset,
mode, mtr));
}
/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
-/**********************************************************************//**
-Writes the mtr log entry of an undo log page initialization. */
-UNIV_INLINE
-void
-trx_undo_page_init_log(
-/*===================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint type, /*!< in: undo log type */
- mtr_t* mtr) /*!< in: mtr */
+/** Parse MLOG_UNDO_INIT.
+@param[in] ptr log record
+@param[in] end_ptr end of log record buffer
+@param[in,out] page page or NULL
+@return end of log record
+@retval NULL if the log record is incomplete */
+byte*
+trx_undo_parse_page_init(const byte* ptr, const byte* end_ptr, page_t* page)
{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
+ if (end_ptr <= ptr) {
+ return NULL;
+ }
- mlog_catenate_ulint_compressed(mtr, type);
+ const ulint type = *ptr++;
+
+ if (type > TRX_UNDO_UPDATE) {
+ recv_sys->found_corrupt_log = true;
+ } else if (page) {
+ /* Starting with MDEV-12288 in MariaDB 10.3.1, we use
+ type=0 for the combined insert/update undo log
+ pages. MariaDB 10.2 would use TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE. */
+ mach_write_to_2(FIL_PAGE_TYPE + page, FIL_PAGE_UNDO_LOG);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + page,
+ type);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + page,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + page,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ }
+
+ return(const_cast<byte*>(ptr));
}
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
+/** Parse MLOG_UNDO_HDR_REUSE for crash-upgrade from MariaDB 10.2.
+@param[in] ptr redo log record
+@param[in] end_ptr end of log buffer
+@param[in,out] page undo log page or NULL
@return end of log record or NULL */
byte*
-trx_undo_parse_page_init(
-/*=====================*/
- const byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
+trx_undo_parse_page_header_reuse(
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* undo_page)
{
- ulint type;
+ trx_id_t trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
- type = mach_parse_compressed(&ptr, end_ptr);
+ if (!ptr || !undo_page) {
+ return(const_cast<byte*>(ptr));
+ }
- if (ptr == NULL) {
+ compile_time_assert(TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE
+ + TRX_UNDO_LOG_XA_HDR_SIZE
+ < UNIV_PAGE_SIZE_MIN - 100);
- return(NULL);
- }
+ const ulint new_free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE
+ + TRX_UNDO_LOG_OLD_HDR_SIZE;
- if (page) {
- trx_undo_page_init(page, type, mtr);
- }
+ /* Insert undo data is not needed after commit: we may free all
+ the space on the page */
- return(const_cast<byte*>(ptr));
-}
+ ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + undo_page)
+ == TRX_UNDO_INSERT);
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
+ byte* page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+ mach_write_to_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + undo_page,
+ TRX_UNDO_ACTIVE);
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ byte* log_hdr = undo_page + TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
+ mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+ mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+ mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
+ mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
+
+ return(const_cast<byte*>(ptr));
+}
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+/** Initialize the fields in an undo log segment page.
+@param[in,out] undo_block undo page
+@param[in,out] mtr mini-transaction */
+static void trx_undo_page_init(buf_block_t* undo_block, mtr_t* mtr)
+{
+ page_t* page = undo_block->frame;
+ mach_write_to_2(FIL_PAGE_TYPE + page, FIL_PAGE_UNDO_LOG);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + page, 0);
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + page,
TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
+ mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + page,
TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
+ mtr->set_modified();
+ switch (mtr->get_log_mode()) {
+ case MTR_LOG_NONE:
+ case MTR_LOG_NO_REDO:
+ return;
+ case MTR_LOG_SHORT_INSERTS:
+ ut_ad(0);
+ /* fall through */
+ case MTR_LOG_ALL:
+ break;
+ }
- trx_undo_page_init_log(undo_page, type, mtr);
+ byte* log_ptr = mtr->get_log()->open(11 + 1);
+ log_ptr = mlog_write_initial_log_record_low(
+ MLOG_UNDO_INIT,
+ undo_block->page.id.space(),
+ undo_block->page.id.page_no(),
+ log_ptr, mtr);
+ *log_ptr++ = 0;
+ mlog_close(mtr, log_ptr);
}
-/***************************************************************//**
-Creates a new undo log segment in file.
-@return DB_SUCCESS if page creation OK possible error codes are:
-DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-trx_undo_seg_create(
-/*================*/
- trx_rseg_t* rseg MY_ATTRIBUTE((unused)),/*!< in: rollback segment */
- trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page
- x-latched */
- ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint* id, /*!< out: slot index within rseg header */
- page_t** undo_page,
- /*!< out: segment header page x-latched, NULL
- if there was an error */
- mtr_t* mtr) /*!< in: mtr */
+/** Create an undo log segment.
+@param[in,out] space tablespace
+@param[in,out] rseg_hdr rollback segment header (x-latched)
+@param[out] id undo slot number
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return undo log block
+@retval NULL on failure */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+buf_block_t*
+trx_undo_seg_create(fil_space_t* space, trx_rsegf_t* rseg_hdr, ulint* id,
+ dberr_t* err, mtr_t* mtr)
{
ulint slot_no;
- ulint space;
buf_block_t* block;
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
ulint n_reserved;
bool success;
- dberr_t err = DB_SUCCESS;
- ut_ad(mtr != NULL);
- ut_ad(id != NULL);
- ut_ad(rseg_hdr != NULL);
- ut_ad(mutex_own(&(rseg->mutex)));
-
- /* fputs(type == TRX_UNDO_INSERT
- ? "Creating insert undo log segment\n"
- : "Creating update undo log segment\n", stderr); */
- slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
+ slot_no = trx_rsegf_undo_find_free(rseg_hdr);
if (slot_no == ULINT_UNDEFINED) {
ib::warn() << "Cannot find a free slot for an undo log. Do"
" you have too many active transactions running"
" concurrently?";
- return(DB_TOO_MANY_CONCURRENT_TRXS);
+ *err = DB_TOO_MANY_CONCURRENT_TRXS;
+ return NULL;
}
- space = page_get_space_id(page_align(rseg_hdr));
-
success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
mtr);
if (!success) {
-
- return(DB_OUT_OF_FILE_SPACE);
+ *err = DB_OUT_OF_FILE_SPACE;
+ return NULL;
}
/* Allocate a new file segment for the undo log */
- block = fseg_create_general(space, 0,
- TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+ block = fseg_create(space, 0, TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
+ mtr, true);
- fil_space_release_free_extents(space, n_reserved);
+ space->release_free_extents(n_reserved);
if (block == NULL) {
- /* No space left */
-
- return(DB_OUT_OF_FILE_SPACE);
+ *err = DB_OUT_OF_FILE_SPACE;
+ return NULL;
}
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
- *undo_page = buf_block_get_frame(block);
-
- page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
-
- trx_undo_page_init(*undo_page, type, mtr);
+ trx_undo_page_init(block, mtr);
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
+ mlog_write_ulint(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block->frame,
TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
MLOG_2BYTES, mtr);
- mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
+ mlog_write_ulint(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + block->frame,
+ 0, MLOG_2BYTES, mtr);
- flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
+ flst_init(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame, mtr);
- flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
- page_hdr + TRX_UNDO_PAGE_NODE, mtr);
+ flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + block->frame,
+ mtr);
- trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
- page_get_page_no(*undo_page), mtr);
*id = slot_no;
+ trx_rsegf_set_nth_undo(rseg_hdr, slot_no, block->page.id.page_no(),
+ mtr);
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
- return(err);
+ *err = DB_SUCCESS;
+ return block;
}
/**********************************************************************//**
@@ -537,7 +613,7 @@ trx_undo_header_create(
new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
+ ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < srv_page_size - 100);
mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
@@ -559,7 +635,7 @@ trx_undo_header_create(
log_hdr = undo_page + free;
- mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
+ mach_write_to_2(log_hdr + TRX_UNDO_NEEDS_PURGE, 1);
mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
@@ -607,10 +683,7 @@ trx_undo_write_xid(
Read X/Open XA Transaction Identification (XID) from undo log header */
static
void
-trx_undo_read_xid(
-/*==============*/
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- XID* xid) /*!< out: X/Open XA Transaction Identification */
+trx_undo_read_xid(const trx_ulogf_t* log_hdr, XID* xid)
{
xid->formatID=static_cast<long>(mach_read_from_4(
log_hdr + TRX_UNDO_XA_FORMAT));
@@ -662,23 +735,7 @@ trx_undo_header_add_space_for_xid(
MLOG_2BYTES, mtr);
}
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header reuse. */
-UNIV_INLINE
-void
-trx_undo_insert_header_reuse_log(
-/*=============================*/
- const page_t* undo_page, /*!< in: undo log header page */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
-
- mlog_catenate_ull_compressed(mtr, trx_id);
-}
-
-/** Parse the redo log entry of an undo log page header create or reuse.
-@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+/** Parse the redo log entry of an undo log page header create.
@param[in] ptr redo log record
@param[in] end_ptr end of log buffer
@param[in,out] page page frame or NULL
@@ -686,7 +743,6 @@ trx_undo_insert_header_reuse_log(
@return end of log record or NULL */
byte*
trx_undo_parse_page_header(
- mlog_id_t type,
const byte* ptr,
const byte* end_ptr,
page_t* page,
@@ -695,93 +751,20 @@ trx_undo_parse_page_header(
trx_id_t trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
if (ptr != NULL && page != NULL) {
- switch (type) {
- case MLOG_UNDO_HDR_CREATE:
- trx_undo_header_create(page, trx_id, mtr);
- return(const_cast<byte*>(ptr));
- case MLOG_UNDO_HDR_REUSE:
- trx_undo_insert_header_reuse(page, trx_id, mtr);
- return(const_cast<byte*>(ptr));
- default:
- break;
- }
- ut_ad(0);
+ trx_undo_header_create(page, trx_id, mtr);
+ return(const_cast<byte*>(ptr));
}
return(const_cast<byte*>(ptr));
}
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- /* Insert undo data is not needed after commit: we may free all
- the space on the page */
-
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- log_hdr = undo_page + free;
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- /* Write the log record MLOG_UNDO_HDR_REUSE */
- trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
/** Allocate an undo log page.
-@param[in,out] trx transaction
@param[in,out] undo undo log
@param[in,out] mtr mini-transaction that does not hold any page latch
@return X-latched block if success
@retval NULL on failure */
-buf_block_t*
-trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr)
{
- ut_ad(mutex_own(&trx->undo_mutex));
-
trx_rseg_t* rseg = undo->rseg;
buf_block_t* new_block = NULL;
ulint n_reserved;
@@ -792,14 +775,11 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
counterpart of the tree latch, which is the rseg mutex. */
mutex_enter(&rseg->mutex);
- if (rseg->curr_size == rseg->max_size) {
- goto func_exit;
- }
header_page = trx_undo_page_get(
- page_id_t(undo->space, undo->hdr_page_no), mtr);
+ page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
- if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1,
+ if (!fsp_reserve_free_extents(&n_reserved, undo->rseg->space, 1,
FSP_UNDO, mtr)) {
goto func_exit;
}
@@ -809,7 +789,7 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+ header_page,
undo->top_page_no + 1, FSP_UP, TRUE, mtr, mtr);
- fil_space_release_free_extents(undo->space, n_reserved);
+ rseg->space->release_free_extents(n_reserved);
if (!new_block) {
goto func_exit;
@@ -819,7 +799,7 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE);
undo->last_page_no = new_block->page.id.page_no();
- trx_undo_page_init(new_block->frame, undo->type, mtr);
+ trx_undo_page_init(new_block, mtr);
flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
+ header_page,
@@ -842,9 +822,8 @@ ulint
trx_undo_free_page(
/*===============*/
trx_rseg_t* rseg, /*!< in: rollback segment */
- ibool in_history, /*!< in: TRUE if the undo log is in the history
+ bool in_history, /*!< in: TRUE if the undo log is in the history
list */
- ulint space, /*!< in: space */
ulint hdr_page_no, /*!< in: header page number */
ulint page_no, /*!< in: page number to free: must not be the
header page */
@@ -852,34 +831,30 @@ trx_undo_free_page(
undo log page; the caller must have reserved
the rollback segment mutex */
{
- page_t* header_page;
- page_t* undo_page;
- fil_addr_t last_addr;
- trx_rsegf_t* rseg_header;
- ulint hist_size;
+ const ulint space = rseg->space->id;
ut_a(hdr_page_no != page_no);
ut_ad(mutex_own(&(rseg->mutex)));
- undo_page = trx_undo_page_get(page_id_t(space, page_no), mtr);
-
- header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr);
+ page_t* undo_page = trx_undo_page_get(page_id_t(space, page_no), mtr);
+ page_t* header_page = trx_undo_page_get(page_id_t(space, hdr_page_no),
+ mtr);
- flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+ flst_remove(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_page,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + undo_page, mtr);
- fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
- space, page_no, false, mtr);
+ fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + header_page,
+ rseg->space, page_no, false, mtr);
- last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_PAGE_LIST, mtr);
+ const fil_addr_t last_addr = flst_get_last(
+ TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_page, mtr);
rseg->curr_size--;
if (in_history) {
- rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
+ trx_rsegf_t* rseg_header = trx_rsegf_get(
+ rseg->space, rseg->page_no, mtr);
+ uint32_t hist_size = mach_read_from_4(
+ rseg_header + TRX_RSEG_HISTORY_SIZE);
ut_ad(hist_size > 0);
mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
hist_size - 1, MLOG_4BYTES, mtr);
@@ -899,40 +874,11 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr)
ut_ad(undo->size > 0);
undo->last_page_no = trx_undo_free_page(
- undo->rseg, FALSE, undo->space,
- undo->hdr_page_no, undo->last_page_no, mtr);
+ undo->rseg, false, undo->hdr_page_no, undo->last_page_no, mtr);
undo->size--;
}
-/** Empties an undo log header page of undo records for that undo log.
-Other undo logs may still have records on that page, if it is an update
-undo log.
-@param[in] space space
-@param[in] hdr_page_no header page number
-@param[in] hdr_offset header offset
-@param[in,out] mtr mini-transaction */
-static
-void
-trx_undo_empty_header_page(
- ulint space,
- ulint hdr_page_no,
- ulint hdr_offset,
- mtr_t* mtr)
-{
- page_t* header_page;
- trx_ulogf_t* log_hdr;
- ulint end;
-
- header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr);
-
- log_hdr = header_page + hdr_offset;
-
- end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
-}
-
/** Truncate the tail of an undo log during rollback.
@param[in,out] undo undo log
@param[in] limit all undo logs after this limit will be discarded
@@ -952,7 +898,8 @@ trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp)
trx_undo_rec_t* trunc_here = NULL;
page_t* undo_page = trx_undo_page_get(
- page_id_t(undo->space, undo->last_page_no), &mtr);
+ page_id_t(undo->rseg->space->id, undo->last_page_no),
+ &mtr);
trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
undo_page, undo->hdr_page_no, undo->hdr_offset);
while (rec) {
@@ -974,7 +921,7 @@ function_exit:
if (trunc_here) {
mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE,
- trunc_here - undo_page,
+ ulint(trunc_here - undo_page),
MLOG_2BYTES, &mtr);
}
@@ -1044,12 +991,18 @@ loop:
page_no = page_get_page_no(undo_page);
if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(rseg->space,
- hdr_page_no, hdr_offset,
- &mtr);
+ uint16_t end = mach_read_from_2(hdr_offset + TRX_UNDO_NEXT_LOG
+ + undo_page);
+ if (end == 0) {
+ end = mach_read_from_2(TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE
+ + undo_page);
+ }
+
+ mlog_write_ulint(undo_page + hdr_offset + TRX_UNDO_LOG_START,
+ end, MLOG_2BYTES, &mtr);
} else {
- trx_undo_free_page(rseg, TRUE, rseg->space, hdr_page_no,
- page_no, &mtr);
+ trx_undo_free_page(rseg, true, hdr_page_no, page_no, &mtr);
}
mtr_commit(&mtr);
@@ -1085,7 +1038,7 @@ trx_undo_seg_free(
mutex_enter(&(rseg->mutex));
- seg_header = trx_undo_page_get(page_id_t(undo->space,
+ seg_header = trx_undo_page_get(page_id_t(undo->rseg->space->id,
undo->hdr_page_no),
&mtr)
+ TRX_UNDO_SEG_HDR;
@@ -1111,181 +1064,108 @@ trx_undo_seg_free(
/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
-/********************************************************************//**
-Creates and initializes an undo log memory object according to the values
-in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create_at_db_start(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint page_no,/*!< in: undo log segment page number */
- mtr_t* mtr) /*!< in: mtr */
+/** Read an undo log when starting up the database.
+@param[in,out] rseg rollback segment
+@param[in] id rollback segment slot
+@param[in] page_no undo log segment page number
+@param[in,out] max_trx_id the largest observed transaction ID
+@return size of the undo log in pages */
+ulint
+trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
+ trx_id_t& max_trx_id)
{
- page_t* undo_page;
- trx_upagef_t* page_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_undo_t* undo;
- ulint type;
- ulint state;
- trx_id_t trx_id;
- ulint offset;
- fil_addr_t last_addr;
- page_t* last_page;
- trx_undo_rec_t* rec;
+ mtr_t mtr;
XID xid;
- ibool xid_exists = FALSE;
- ut_a(id < TRX_RSEG_N_SLOTS);
-
- undo_page = trx_undo_page_get(page_id_t(rseg->space, page_no), mtr);
-
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
- mtr);
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
+ ut_ad(id < TRX_RSEG_N_SLOTS);
- state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
-
- offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
-
- undo_header = undo_page + offset;
+ mtr.start();
+ const page_t* undo_page = trx_undo_page_get(
+ page_id_t(rseg->space->id, page_no), &mtr);
+ const ulint type = mach_read_from_2(
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + undo_page);
+ ut_ad(type == 0 || type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
- trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
+ uint state = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+ + undo_page);
+ uint offset = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG
+ + undo_page);
- xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- MLOG_1BYTE, mtr);
+ const trx_ulogf_t* undo_header = undo_page + offset;
/* Read X/Open XA transaction identification if it exists, or
set it to NULL. */
- xid.null();
- if (xid_exists == TRUE) {
+ if (undo_header[TRX_UNDO_XID_EXISTS]) {
trx_undo_read_xid(undo_header, &xid);
+ } else {
+ xid.null();
}
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid,
- page_no, offset);
- mutex_exit(&(rseg->mutex));
-
- undo->dict_operation = mtr_read_ulint(
- undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr);
-
- undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
- undo->state = state;
- undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST);
-
- /* If the log segment is being freed, the page list is inconsistent! */
- if (state == TRX_UNDO_TO_FREE) {
-
- goto add_to_list;
+ trx_id_t trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
+ if (trx_id > max_trx_id) {
+ max_trx_id = trx_id;
}
- last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- undo->last_page_no = last_addr.page;
- undo->top_page_no = last_addr.page;
-
- last_page = trx_undo_page_get(
- page_id_t(rseg->space, undo->last_page_no), mtr);
+ mutex_enter(&rseg->mutex);
+ trx_undo_t* undo = trx_undo_mem_create(
+ rseg, id, trx_id, &xid, page_no, offset);
+ mutex_exit(&rseg->mutex);
- rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
+ undo->dict_operation = undo_header[TRX_UNDO_DICT_TRANS];
+ undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
+ undo->size = flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
+ + undo_page);
- if (rec == NULL) {
- undo->empty = TRUE;
+ if (UNIV_UNLIKELY(state == TRX_UNDO_TO_FREE)) {
+ /* This is an old-format insert_undo log segment that
+ is being freed. The page list is inconsistent. */
+ ut_ad(type == TRX_UNDO_INSERT);
+ state = TRX_UNDO_TO_PURGE;
} else {
- undo->empty = FALSE;
- undo->top_offset = rec - last_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
- }
-add_to_list:
- if (type == TRX_UNDO_INSERT) {
- if (state != TRX_UNDO_CACHED) {
+ if (state == TRX_UNDO_TO_PURGE
+ || state == TRX_UNDO_CACHED) {
+ trx_id_t id = mach_read_from_8(TRX_UNDO_TRX_NO
+ + undo_header);
+ if (id > max_trx_id) {
+ max_trx_id = id;
+ }
+ }
- UT_LIST_ADD_LAST(rseg->insert_undo_list, undo);
- } else {
+ fil_addr_t last_addr = flst_get_last(
+ TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + undo_page,
+ &mtr);
- UT_LIST_ADD_LAST(rseg->insert_undo_cached, undo);
+ undo->last_page_no = last_addr.page;
+ undo->top_page_no = last_addr.page;
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- }
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
- if (state != TRX_UNDO_CACHED) {
+ page_t* last_page = trx_undo_page_get(
+ page_id_t(rseg->space->id, undo->last_page_no), &mtr);
- UT_LIST_ADD_LAST(rseg->update_undo_list, undo);
+ if (const trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
+ last_page, page_no, offset)) {
+ undo->top_offset = ulint(rec - last_page);
+ undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
+ ut_ad(!undo->empty());
} else {
-
- UT_LIST_ADD_LAST(rseg->update_undo_cached, undo);
-
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ undo->top_undo_no = IB_ID_MAX;
+ ut_ad(undo->empty());
}
}
- return(undo);
-}
-
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy. This
-function is only called when the database is started or a new rollback
-segment is created.
-@return the combined size of undo log segments in pages */
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg) /*!< in: rollback segment memory object */
-{
- ulint size = 0;
- trx_rsegf_t* rseg_header;
- ulint i;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
- ulint page_no;
-
- page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
-
- /* In forced recovery: try to avoid operations which look
- at database pages; undo logs are rapidly changing data, and
- the probability that they are in an inconsistent state is
- high */
-
- if (page_no != FIL_NULL
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
- trx_undo_t* undo;
-
- undo = trx_undo_mem_create_at_db_start(
- rseg, i, page_no, &mtr);
-
- size += undo->size;
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get(
- rseg->space, rseg->page_no, &mtr);
+ undo->state = state;
- /* Found a used slot */
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
- }
+ if (state != TRX_UNDO_CACHED) {
+ UT_LIST_ADD_LAST(type == TRX_UNDO_INSERT
+ ? rseg->old_insert_list
+ : rseg->undo_list, undo);
+ } else {
+ UT_LIST_ADD_LAST(rseg->undo_cached, undo);
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
- mtr_commit(&mtr);
-
- return(size);
+ mtr.commit();
+ return undo->size;
}
/********************************************************************//**
@@ -1297,8 +1177,6 @@ trx_undo_mem_create(
/*================*/
trx_rseg_t* rseg, /*!< in: rollback segment memory object */
ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
const XID* xid, /*!< in: X/Open transaction identification */
@@ -1319,9 +1197,7 @@ trx_undo_mem_create(
}
undo->id = id;
- undo->type = type;
undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
undo->trx_id = trx_id;
undo->xid = *xid;
@@ -1329,16 +1205,16 @@ trx_undo_mem_create(
undo->rseg = rseg;
- undo->space = rseg->space;
undo->hdr_page_no = page_no;
undo->hdr_offset = offset;
undo->last_page_no = page_no;
undo->size = 1;
- undo->empty = TRUE;
+ undo->top_undo_no = IB_ID_MAX;
undo->top_page_no = page_no;
undo->guess_block = NULL;
undo->withdraw_clock = 0;
+ ut_ad(undo->empty());
return(undo);
}
@@ -1360,201 +1236,200 @@ trx_undo_mem_init_for_reuse(
ut_a(undo->id < TRX_RSEG_N_SLOTS);
undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
undo->trx_id = trx_id;
undo->xid = *xid;
undo->dict_operation = FALSE;
undo->hdr_offset = offset;
- undo->empty = TRUE;
-}
-
-/********************************************************************//**
-Frees an undo log memory copy. */
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: the undo object to be freed */
-{
- ut_a(undo->id < TRX_RSEG_N_SLOTS);
-
- ut_free(undo);
+ undo->top_undo_no = IB_ID_MAX;
+ ut_ad(undo->empty());
}
-/**********************************************************************//**
-Creates a new undo log.
-@return DB_SUCCESS if successful in creating the new undo lob object,
-possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
-DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
+/** Create an undo log.
+@param[in,out] trx transaction
+@param[in,out] rseg rollback segment
+@param[out] undo undo log object
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return undo log block
+@retval NULL on failure */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-trx_undo_create(
-/*============*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open transaction identification*/
- trx_undo_t** undo, /*!< out: the new undo log object, undefined
- * if did not succeed */
- mtr_t* mtr) /*!< in: mtr */
+buf_block_t*
+trx_undo_create(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+ dberr_t* err, mtr_t* mtr)
{
- trx_rsegf_t* rseg_header;
- ulint page_no;
- ulint offset;
ulint id;
- page_t* undo_page;
- dberr_t err;
ut_ad(mutex_own(&(rseg->mutex)));
- if (rseg->curr_size == rseg->max_size) {
+ buf_block_t* block = trx_undo_seg_create(
+ rseg->space,
+ trx_rsegf_get(rseg->space, rseg->page_no, mtr), &id, err, mtr);
- return(DB_OUT_OF_FILE_SPACE);
+ if (!block) {
+ return NULL;
}
rseg->curr_size++;
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
-
- err = trx_undo_seg_create(rseg, rseg_header, type, &id,
- &undo_page, mtr);
-
- if (err != DB_SUCCESS) {
- /* Did not succeed */
+ ulint offset = trx_undo_header_create(block->frame, trx->id, mtr);
- rseg->curr_size--;
+ trx_undo_header_add_space_for_xid(block->frame, block->frame + offset,
+ mtr);
- return(err);
- }
-
- page_no = page_get_page_no(undo_page);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
-
- *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
- page_no, offset);
+ *undo = trx_undo_mem_create(rseg, id, trx->id, trx->xid,
+ block->page.id.page_no(), offset);
if (*undo == NULL) {
+ *err = DB_OUT_OF_MEMORY;
+ /* FIXME: this will not free the undo block to the file */
+ return NULL;
+ } else if (rseg != trx->rsegs.m_redo.rseg) {
+ return block;
+ }
- err = DB_OUT_OF_MEMORY;
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_INDEX:
+ /* Do not discard the table on recovery. */
+ trx->table_id = 0;
+ /* fall through */
+ case TRX_DICT_OP_TABLE:
+ (*undo)->table_id = trx->table_id;
+ (*undo)->dict_operation = TRUE;
+ mlog_write_ulint(block->frame + offset + TRX_UNDO_DICT_TRANS,
+ TRUE, MLOG_1BYTE, mtr);
+ mlog_write_ull(block->frame + offset + TRX_UNDO_TABLE_ID,
+ trx->table_id, mtr);
}
- return(err);
+ *err = DB_SUCCESS;
+ return block;
}
/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
-/********************************************************************//**
-Reuses a cached undo log.
-@return the undo log memory object, NULL if none cached */
+/** Reuse a cached undo log block.
+@param[in,out] trx transaction
+@param[in,out] rseg rollback segment
+@param[out] pundo the undo log memory object
+@param[in,out] mtr mini-transaction
+@return the undo log block
+@retval NULL if none cached */
static
-trx_undo_t*
-trx_undo_reuse_cached(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is used */
- const XID* xid, /*!< in: X/Open XA transaction identification */
- mtr_t* mtr) /*!< in: mtr */
+buf_block_t*
+trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo,
+ mtr_t* mtr)
{
- trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (type == TRX_UNDO_INSERT) {
-
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+ ut_ad(mutex_own(&rseg->mutex));
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
+ if (!undo) {
+ return NULL;
}
ut_ad(undo->size == 1);
- ut_a(undo->id < TRX_RSEG_N_SLOTS);
+ ut_ad(undo->id < TRX_RSEG_N_SLOTS);
- undo_page = trx_undo_page_get(
- page_id_t(undo->space, undo->hdr_page_no), mtr);
+ buf_block_t* block = buf_page_get(page_id_t(undo->rseg->space->id,
+ undo->hdr_page_no),
+ univ_page_size, RW_X_LATCH, mtr);
+ if (!block) {
+ return NULL;
+ }
- if (type == TRX_UNDO_INSERT) {
- offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- } else {
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE);
+ UT_LIST_REMOVE(rseg->undo_cached, undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
+ *pundo = undo;
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
+ ulint offset = trx_undo_header_create(block->frame, trx->id, mtr);
+ /* Reset the TRX_UNDO_PAGE_TYPE in case this page is being
+ repurposed after upgrading to MariaDB 10.3. */
+ if (ut_d(ulint type =) UNIV_UNLIKELY(
+ mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + block->frame))) {
+ ut_ad(type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
+ mlog_write_ulint(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+ + block->frame, 0, MLOG_2BYTES, mtr);
}
- trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
-
- return(undo);
-}
+ trx_undo_header_add_space_for_xid(block->frame, block->frame + offset,
+ mtr);
-/** Mark that an undo log header belongs to a data dictionary transaction.
-@param[in] trx dictionary transaction
-@param[in,out] undo undo log
-@param[in,out] mtr mini-transaction */
-void trx_undo_mark_as_dict(const trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
-{
- ut_ad(undo == trx->rsegs.m_redo.insert_undo
- || undo == trx->rsegs.m_redo.update_undo);
+ trx_undo_mem_init_for_reuse(undo, trx->id, trx->xid, offset);
- page_t* hdr_page = trx_undo_page_get(
- page_id_t(undo->space, undo->hdr_page_no), mtr);
+ if (rseg != trx->rsegs.m_redo.rseg) {
+ return block;
+ }
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
- ut_error;
+ return block;
case TRX_DICT_OP_INDEX:
/* Do not discard the table on recovery. */
- undo->table_id = 0;
- break;
+ trx->table_id = 0;
+ /* fall through */
case TRX_DICT_OP_TABLE:
undo->table_id = trx->table_id;
- break;
+ undo->dict_operation = TRUE;
+ mlog_write_ulint(block->frame + offset + TRX_UNDO_DICT_TRANS,
+ TRUE, MLOG_1BYTE, mtr);
+ mlog_write_ull(block->frame + offset + TRX_UNDO_TABLE_ID,
+ trx->table_id, mtr);
}
- mlog_write_ulint(hdr_page + undo->hdr_offset
- + TRX_UNDO_DICT_TRANS,
- TRUE, MLOG_1BYTE, mtr);
+ return block;
+}
+
+/** Assign an undo log for a persistent transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out] trx transaction
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return the undo log block
+@retval NULL on error */
+buf_block_t*
+trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
+{
+ ut_ad(mtr->get_log_mode() == MTR_LOG_ALL);
+
+ trx_undo_t* undo = trx->rsegs.m_redo.undo;
- mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
- undo->table_id, mtr);
+ if (undo) {
+ return buf_page_get_gen(
+ page_id_t(undo->rseg->space->id, undo->last_page_no),
+ univ_page_size, RW_X_LATCH,
+ buf_pool_is_obsolete(undo->withdraw_clock)
+ ? NULL : undo->guess_block,
+ BUF_GET, __FILE__, __LINE__, mtr, err);
+ }
+
+ trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
- undo->dict_operation = TRUE;
+ mutex_enter(&rseg->mutex);
+ buf_block_t* block = trx_undo_reuse_cached(
+ trx, rseg, &trx->rsegs.m_redo.undo, mtr);
+
+ if (!block) {
+ block = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo,
+ err, mtr);
+ ut_ad(!block == (*err != DB_SUCCESS));
+ if (!block) {
+ goto func_exit;
+ }
+ } else {
+ *err = DB_SUCCESS;
+ }
+
+ UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo);
+
+func_exit:
+ mutex_exit(&rseg->mutex);
+ return block;
}
/** Assign an undo log for a transaction.
@@ -1562,73 +1437,57 @@ A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
-@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE
-@retval DB_SUCCESS on success
-@retval DB_TOO_MANY_CONCURRENT_TRXS
-@retval DB_OUT_OF_FILE_SPACE
-@retval DB_READ_ONLY
-@retval DB_OUT_OF_MEMORY */
-dberr_t
-trx_undo_assign_undo(
- trx_t* trx,
- trx_rseg_t* rseg,
- trx_undo_t** undo,
- ulint type)
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return the undo log block
+@retval NULL on error */
+buf_block_t*
+trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+ dberr_t* err, mtr_t* mtr)
{
- const bool is_temp = rseg == trx->rsegs.m_noredo.rseg;
- mtr_t mtr;
- dberr_t err = DB_SUCCESS;
+ const bool is_temp __attribute__((unused)) = rseg == trx->rsegs.m_noredo.rseg;
- ut_ad(mutex_own(&trx->undo_mutex));
ut_ad(rseg == trx->rsegs.m_redo.rseg
|| rseg == trx->rsegs.m_noredo.rseg);
- ut_ad(type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
-
- mtr.start();
-
- if (is_temp) {
- mtr.set_log_mode(MTR_LOG_NO_REDO);
- ut_ad(undo == &trx->rsegs.m_noredo.undo);
- } else {
- ut_ad(undo == (type == TRX_UNDO_INSERT
- ? &trx->rsegs.m_redo.insert_undo
- : &trx->rsegs.m_redo.update_undo));
+ ut_ad(undo == (is_temp
+ ? &trx->rsegs.m_noredo.undo
+ : &trx->rsegs.m_redo.undo));
+ ut_ad(mtr->get_log_mode()
+ == (is_temp ? MTR_LOG_NO_REDO : MTR_LOG_ALL));
+
+ if (*undo) {
+ return buf_page_get_gen(
+ page_id_t(rseg->space->id, (*undo)->last_page_no),
+ univ_page_size, RW_X_LATCH,
+ buf_pool_is_obsolete((*undo)->withdraw_clock)
+ ? NULL : (*undo)->guess_block,
+ BUF_GET, __FILE__, __LINE__, mtr, err);
}
- mutex_enter(&rseg->mutex);
-
DBUG_EXECUTE_IF(
"ib_create_table_fail_too_many_trx",
- err = DB_TOO_MANY_CONCURRENT_TRXS;
- goto func_exit;
+ *err = DB_TOO_MANY_CONCURRENT_TRXS; return NULL;
);
- *undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, trx->xid,
- &mtr);
- if (*undo == NULL) {
- err = trx_undo_create(trx, rseg, type, trx->id, trx->xid,
- undo, &mtr);
- if (err != DB_SUCCESS) {
+ mutex_enter(&rseg->mutex);
+
+ buf_block_t* block = trx_undo_reuse_cached(trx, rseg, undo, mtr);
+
+ if (!block) {
+ block = trx_undo_create(trx, rseg, undo, err, mtr);
+ ut_ad(!block == (*err != DB_SUCCESS));
+ if (!block) {
goto func_exit;
}
- }
-
- if (is_temp) {
- UT_LIST_ADD_FIRST(rseg->insert_undo_list, *undo);
} else {
- UT_LIST_ADD_FIRST(type == TRX_UNDO_INSERT
- ? rseg->insert_undo_list
- : rseg->update_undo_list, *undo);
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- trx_undo_mark_as_dict(trx, *undo, &mtr);
- }
+ *err = DB_SUCCESS;
}
+ UT_LIST_ADD_FIRST(rseg->undo_list, *undo);
+
func_exit:
mutex_exit(&rseg->mutex);
- mtr.commit();
-
- return(err);
+ return block;
}
/******************************************************************//**
@@ -1648,7 +1507,7 @@ trx_undo_set_state_at_finish(
ut_a(undo->id < TRX_RSEG_N_SLOTS);
undo_page = trx_undo_page_get(
- page_id_t(undo->space, undo->hdr_page_no), mtr);
+ page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1658,10 +1517,6 @@ trx_undo_set_state_at_finish(
< TRX_UNDO_PAGE_REUSE_LIMIT) {
state = TRX_UNDO_CACHED;
-
- } else if (undo->type == TRX_UNDO_INSERT) {
-
- state = TRX_UNDO_TO_FREE;
} else {
state = TRX_UNDO_TO_PURGE;
}
@@ -1675,7 +1530,7 @@ trx_undo_set_state_at_finish(
/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
@param[in,out] trx transaction
-@param[in,out] undo insert_undo or update_undo log
+@param[in,out] undo undo log
@param[in] rollback false=XA PREPARE, true=XA ROLLBACK
@param[in,out] mtr mini-transaction
@return undo log segment header page, x-latched */
@@ -1696,7 +1551,7 @@ trx_undo_set_state_at_prepare(
ut_a(undo->id < TRX_RSEG_N_SLOTS);
undo_page = trx_undo_page_get(
- page_id_t(undo->space, undo->hdr_page_no), mtr);
+ page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
@@ -1727,43 +1582,7 @@ trx_undo_set_state_at_prepare(
return(undo_page);
}
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update
- undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_t* undo = trx->rsegs.m_redo.update_undo;
- trx_rseg_t* rseg = undo->rseg;
-
- ut_ad(mutex_own(&rseg->mutex));
-
- trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
-
- UT_LIST_REMOVE(rseg->update_undo_list, undo);
-
- trx->rsegs.m_redo.update_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(rseg->update_undo_cached, undo);
-
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
- trx_undo_mem_free(undo);
- }
-}
-
-/** Free an insert or temporary undo log after commit or rollback.
+/** Free an old insert or temporary undo log after commit or rollback.
The information is not needed after a commit or rollback, therefore
the data can be discarded.
@param[in,out] undo undo log
@@ -1773,44 +1592,39 @@ trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp)
{
trx_rseg_t* rseg = undo->rseg;
ut_ad(is_temp == !rseg->is_persistent());
+ ut_ad(!is_temp || 0 == UT_LIST_GET_LEN(rseg->old_insert_list));
mutex_enter(&rseg->mutex);
- UT_LIST_REMOVE(rseg->insert_undo_list, undo);
+ UT_LIST_REMOVE(is_temp ? rseg->undo_list : rseg->old_insert_list,
+ undo);
if (undo->state == TRX_UNDO_CACHED) {
- UT_LIST_ADD_FIRST(rseg->insert_undo_cached, undo);
+ UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
- ut_ad(undo->state == TRX_UNDO_TO_FREE);
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
/* Delete first the undo log segment in the file */
mutex_exit(&rseg->mutex);
- if (!srv_read_only_mode) {
- trx_undo_seg_free(undo, is_temp);
- }
+ trx_undo_seg_free(undo, true);
mutex_enter(&rseg->mutex);
ut_ad(rseg->curr_size > undo->size);
rseg->curr_size -= undo->size;
- trx_undo_mem_free(undo);
+ ut_free(undo);
}
mutex_exit(&rseg->mutex);
}
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
+/** At shutdown, frees the undo logs of a transaction. */
void
-trx_undo_free_prepared(
-/*===================*/
- trx_t* trx) /*!< in/out: PREPARED transaction */
+trx_undo_free_at_shutdown(trx_t *trx)
{
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-
- if (trx->rsegs.m_redo.update_undo) {
- switch (trx->rsegs.m_redo.update_undo->state) {
+ if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
+ switch (undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_CACHED:
@@ -1821,10 +1635,7 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false and
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
- also for transactions that we faked
- to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
ut_a(!srv_was_started
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
@@ -1834,15 +1645,13 @@ trx_undo_free_prepared(
ut_error;
}
- UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list,
- trx->rsegs.m_redo.update_undo);
- trx_undo_mem_free(trx->rsegs.m_redo.update_undo);
-
- trx->rsegs.m_redo.update_undo = NULL;
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list, undo);
+ ut_free(undo);
+ undo = NULL;
}
- if (trx->rsegs.m_redo.insert_undo) {
- switch (trx->rsegs.m_redo.insert_undo->state) {
+ if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) {
+ switch (undo->state) {
case TRX_UNDO_PREPARED:
break;
case TRX_UNDO_CACHED:
@@ -1853,10 +1662,7 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false and
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
- also for transactions that we faked
- to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
ut_a(!srv_was_started
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
@@ -1866,19 +1672,16 @@ trx_undo_free_prepared(
ut_error;
}
- UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list,
- trx->rsegs.m_redo.insert_undo);
- trx_undo_mem_free(trx->rsegs.m_redo.insert_undo);
-
- trx->rsegs.m_redo.insert_undo = NULL;
+ UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list, undo);
+ ut_free(undo);
+ undo = NULL;
}
if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
ut_a(undo->state == TRX_UNDO_PREPARED);
- UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->insert_undo_list,
- undo);
- trx_undo_mem_free(undo);
+ UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list, undo);
+ ut_free(undo);
undo = NULL;
}
}