summaryrefslogtreecommitdiff
path: root/storage/innobase/log
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/log')
-rw-r--r--storage/innobase/log/log0crypt.cc9
-rw-r--r--storage/innobase/log/log0log.cc990
-rw-r--r--storage/innobase/log/log0recv.cc447
3 files changed, 564 insertions, 882 deletions
diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc
index 478f021cbe4..dff9661c6eb 100644
--- a/storage/innobase/log/log0crypt.cc
+++ b/storage/innobase/log/log0crypt.cc
@@ -24,6 +24,7 @@ Created 11/25/2013 Minli Zhu Google
Modified Jan Lindström jan.lindstrom@mariadb.com
MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
*******************************************************/
+#include <my_global.h>
#include "m_string.h"
#include "log0crypt.h"
#include <mysql/service_my_crypt.h>
@@ -196,7 +197,7 @@ bool
log_crypt_init()
{
ut_ad(log_mutex_own());
- ut_ad(log_sys->is_encrypted());
+ ut_ad(log_sys.is_encrypted());
info.key_version = encryption_key_get_latest_version(
LOG_DEFAULT_ENCRYPTION_KEY);
@@ -392,9 +393,9 @@ log_tmp_block_encrypt(
aes_ctr_iv[1] = offs;
int rc = encryption_crypt(
- src, size, dst, &dst_len,
- const_cast<byte*>(info.crypt_key.bytes), sizeof info.crypt_key,
- reinterpret_cast<byte*>(aes_ctr_iv), sizeof aes_ctr_iv,
+ src, (uint)size, dst, &dst_len,
+ const_cast<byte*>(info.crypt_key.bytes), (uint)(sizeof info.crypt_key),
+ reinterpret_cast<byte*>(aes_ctr_iv), (uint)(sizeof aes_ctr_iv),
encrypt
? ENCRYPTION_FLAG_ENCRYPT|ENCRYPTION_FLAG_NOPAD
: ENCRYPTION_FLAG_DECRYPT|ENCRYPTION_FLAG_NOPAD,
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 4a789382004..3119a110f74 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -69,7 +69,7 @@ c-function and its parameters are written to the log to
reduce the size of the log.
3a) You should not add parameters to these kind of functions
- (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
+ (e.g. trx_undo_header_create())
3b) You should not add such functionality which either change
working when compared with the old or are dependent on data
@@ -81,7 +81,7 @@ reduce the size of the log.
*/
/** Redo log system */
-log_t* log_sys = NULL;
+log_t log_sys;
/** Whether to generate and require checksums on the redo log pages */
my_bool innodb_log_checksums;
@@ -106,7 +106,8 @@ static time_t log_last_margine_warning_time;
/* Margins for free space in the log buffer after a log entry is catenated */
#define LOG_BUF_FLUSH_RATIO 2
-#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
+#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN \
+ + (4U << srv_page_size_shift))
/* This parameter controls asynchronous making of a new checkpoint; the value
should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
@@ -133,15 +134,8 @@ extern "C" UNIV_INTERN
os_thread_ret_t
DECLARE_THREAD(log_scrub_thread)(void*);
-/******************************************************//**
-Completes a checkpoint write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void);
-/*============================*/
-
/****************************************************************//**
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
+Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
exists.
@return LSN of oldest modification */
static
@@ -157,7 +151,7 @@ log_buf_pool_get_oldest_modification(void)
if (!lsn) {
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
}
return(lsn);
@@ -165,17 +159,13 @@ log_buf_pool_get_oldest_modification(void)
/** Extends the log buffer.
@param[in] len requested minimum size in bytes */
-void
-log_buffer_extend(
- ulint len)
+void log_buffer_extend(ulong len)
{
- ulint move_start;
- ulint move_end;
byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
log_mutex_enter_all();
- while (log_sys->is_extending) {
+ while (log_sys.is_extending) {
/* Another thread is trying to extend already.
Needs to wait for. */
log_mutex_exit_all();
@@ -184,28 +174,28 @@ log_buffer_extend(
log_mutex_enter_all();
- if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
+ if (srv_log_buffer_size > len) {
/* Already extended enough by the others */
log_mutex_exit_all();
return;
}
}
- if (len >= log_sys->buf_size / 2) {
+ if (len >= srv_log_buffer_size / 2) {
DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
DBUG_SUICIDE(););
/* log_buffer is too small. try to extend instead of crash. */
- ib::warn() << "The transaction log size is too large"
- " for innodb_log_buffer_size (" << len << " >= "
- << LOG_BUFFER_SIZE << " / 2). Trying to extend it.";
+ ib::warn() << "The redo log transaction size " << len <<
+ " exceeds innodb_log_buffer_size="
+ << srv_log_buffer_size << " / 2). Trying to extend it.";
}
- log_sys->is_extending = true;
+ log_sys.is_extending = true;
- while (ut_calc_align_down(log_sys->buf_free,
+ while (ut_calc_align_down(log_sys.buf_free,
OS_FILE_LOG_BLOCK_SIZE)
- != ut_calc_align_down(log_sys->buf_next_to_write,
+ != ut_calc_align_down(log_sys.buf_next_to_write,
OS_FILE_LOG_BLOCK_SIZE)) {
/* Buffer might have >1 blocks to write still. */
log_mutex_exit_all();
@@ -215,46 +205,46 @@ log_buffer_extend(
log_mutex_enter_all();
}
- move_start = ut_calc_align_down(
- log_sys->buf_free,
+ ulong move_start = ut_calc_align_down(
+ log_sys.buf_free,
OS_FILE_LOG_BLOCK_SIZE);
- move_end = log_sys->buf_free;
+ ulong move_end = log_sys.buf_free;
/* store the last log block in buffer */
- ut_memcpy(tmp_buf, log_sys->buf + move_start,
+ ut_memcpy(tmp_buf, log_sys.buf + move_start,
move_end - move_start);
- log_sys->buf_free -= move_start;
- log_sys->buf_next_to_write -= move_start;
+ log_sys.buf_free -= move_start;
+ log_sys.buf_next_to_write -= move_start;
- /* reallocate log buffer */
- srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
- ut_free(log_sys->buf_ptr);
+ /* free previous after getting the right address */
+ if (!log_sys.first_in_use) {
+ log_sys.buf -= srv_log_buffer_size;
+ }
+ ut_free_dodump(log_sys.buf, srv_log_buffer_size * 2);
- log_sys->buf_size = LOG_BUFFER_SIZE;
+ /* reallocate log buffer */
+ srv_log_buffer_size = len;
- log_sys->buf_ptr = static_cast<byte*>(
- ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
- TRASH_ALLOC(log_sys->buf_ptr,
- log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = static_cast<byte*>(
- ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
+ log_sys.buf = static_cast<byte*>(
+ ut_malloc_dontdump(srv_log_buffer_size * 2));
+ TRASH_ALLOC(log_sys.buf, srv_log_buffer_size * 2);
- log_sys->first_in_use = true;
+ log_sys.first_in_use = true;
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
+ log_sys.max_buf_free = srv_log_buffer_size / LOG_BUF_FLUSH_RATIO
- LOG_BUF_FLUSH_MARGIN;
/* restore the last log block */
- ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
+ ut_memcpy(log_sys.buf, tmp_buf, move_end - move_start);
- ut_ad(log_sys->is_extending);
- log_sys->is_extending = false;
+ ut_ad(log_sys.is_extending);
+ log_sys.is_extending = false;
log_mutex_exit_all();
ib::info() << "innodb_log_buffer_size was extended to "
- << LOG_BUFFER_SIZE << ".";
+ << srv_log_buffer_size << ".";
}
/** Calculate actual length in redo buffer and file including
@@ -273,7 +263,7 @@ log_calculate_actual_len(
- (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
/* actual data length in last block already written */
- ulint extra_len = (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE);
+ ulint extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE);
ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
extra_len -= LOG_BLOCK_HDR_SIZE;
@@ -298,7 +288,7 @@ log_margin_checkpoint_age(
ut_ad(log_mutex_own());
- if (margin > log_sys->log_group_capacity) {
+ if (margin > log_sys.log_group_capacity) {
/* return with warning output to avoid deadlock */
if (!log_has_printed_chkp_margine_warning
|| difftime(time(NULL),
@@ -310,7 +300,7 @@ log_margin_checkpoint_age(
" small for the single transaction log (size="
<< len << "). So, the last checkpoint age"
" might exceed the log group capacity "
- << log_sys->log_group_capacity << ".";
+ << log_sys.log_group_capacity << ".";
}
return;
@@ -319,20 +309,20 @@ log_margin_checkpoint_age(
/* Our margin check should ensure that we never reach this condition.
Try to do checkpoint once. We cannot keep waiting here as it might
result in hang in case the current mtr has latch on oldest lsn */
- if (log_sys->lsn - log_sys->last_checkpoint_lsn + margin
- > log_sys->log_group_capacity) {
+ if (log_sys.lsn - log_sys.last_checkpoint_lsn + margin
+ > log_sys.log_group_capacity) {
/* The log write of 'len' might overwrite the transaction log
after the last checkpoint. Makes checkpoint. */
bool flushed_enough = false;
- if (log_sys->lsn - log_buf_pool_get_oldest_modification()
+ if (log_sys.lsn - log_buf_pool_get_oldest_modification()
+ margin
- <= log_sys->log_group_capacity) {
+ <= log_sys.log_group_capacity) {
flushed_enough = true;
}
- log_sys->check_flush_or_checkpoint = true;
+ log_sys.check_flush_or_checkpoint = true;
log_mutex_exit();
DEBUG_SYNC_C("margin_checkpoint_age_rescue");
@@ -363,7 +353,7 @@ log_reserve_and_open(
loop:
ut_ad(log_mutex_own());
- if (log_sys->is_extending) {
+ if (log_sys.is_extending) {
log_mutex_exit();
/* Log buffer size is extending. Writing up to the next block
@@ -383,7 +373,7 @@ loop:
len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
+ (5 * len) / 4;
- if (log_sys->buf_free + len_upper_limit > log_sys->buf_size) {
+ if (log_sys.buf_free + len_upper_limit > srv_log_buffer_size) {
log_mutex_exit();
DEBUG_SYNC_C("log_buf_size_exceeded");
@@ -399,7 +389,7 @@ loop:
goto loop;
}
- return(log_sys->lsn);
+ return(log_sys.lsn);
}
/************************************************************//**
@@ -411,7 +401,6 @@ log_write_low(
const byte* str, /*!< in: string */
ulint str_len) /*!< in: string length */
{
- log_t* log = log_sys;
ulint len;
ulint data_len;
byte* log_block;
@@ -420,7 +409,7 @@ log_write_low(
part_loop:
/* Calculate a part length */
- data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
+ data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
@@ -431,18 +420,18 @@ part_loop:
data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
len = OS_FILE_LOG_BLOCK_SIZE
- - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
- LOG_BLOCK_TRL_SIZE;
}
- ut_memcpy(log->buf + log->buf_free, str, len);
+ memcpy(log_sys.buf + log_sys.buf_free, str, len);
str_len -= len;
str = str + len;
log_block = static_cast<byte*>(
- ut_align_down(
- log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
+ ut_align_down(log_sys.buf + log_sys.buf_free,
+ OS_FILE_LOG_BLOCK_SIZE));
log_block_set_data_len(log_block, data_len);
@@ -450,20 +439,21 @@ part_loop:
/* This block became full */
log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
log_block_set_checkpoint_no(log_block,
- log_sys->next_checkpoint_no);
+ log_sys.next_checkpoint_no);
len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
- log->lsn += len;
+ log_sys.lsn += len;
/* Initialize the next block header */
- log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
+ log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE,
+ log_sys.lsn);
} else {
- log->lsn += len;
+ log_sys.lsn += len;
}
- log->buf_free += len;
+ log_sys.buf_free += ulong(len);
- ut_ad(log->buf_free <= log->buf_size);
+ ut_ad(log_sys.buf_free <= srv_log_buffer_size);
if (str_len > 0) {
goto part_loop;
@@ -483,16 +473,15 @@ log_close(void)
ulint first_rec_group;
lsn_t oldest_lsn;
lsn_t lsn;
- log_t* log = log_sys;
lsn_t checkpoint_age;
ut_ad(log_mutex_own());
- lsn = log->lsn;
+ lsn = log_sys.lsn;
log_block = static_cast<byte*>(
- ut_align_down(
- log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
+ ut_align_down(log_sys.buf + log_sys.buf_free,
+ OS_FILE_LOG_BLOCK_SIZE));
first_rec_group = log_block_get_first_rec_group(log_block);
@@ -505,14 +494,13 @@ log_close(void)
log_block, log_block_get_data_len(log_block));
}
- if (log->buf_free > log->max_buf_free) {
-
- log->check_flush_or_checkpoint = true;
+ if (log_sys.buf_free > log_sys.max_buf_free) {
+ log_sys.check_flush_or_checkpoint = true;
}
- checkpoint_age = lsn - log->last_checkpoint_lsn;
+ checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
- if (checkpoint_age >= log->log_group_capacity) {
+ if (checkpoint_age >= log_sys.log_group_capacity) {
DBUG_EXECUTE_IF(
"print_all_chkp_warnings",
log_has_printed_chkp_warning = false;);
@@ -525,131 +513,28 @@ log_close(void)
ib::error() << "The age of the last checkpoint is "
<< checkpoint_age << ", which exceeds the log"
- " group capacity " << log->log_group_capacity
+ " group capacity "
+ << log_sys.log_group_capacity
<< ".";
}
}
- if (checkpoint_age <= log->max_modified_age_sync) {
-
+ if (checkpoint_age <= log_sys.max_modified_age_sync) {
goto function_exit;
}
oldest_lsn = buf_pool_get_oldest_modification();
if (!oldest_lsn
- || lsn - oldest_lsn > log->max_modified_age_sync
- || checkpoint_age > log->max_checkpoint_age_async) {
-
- log->check_flush_or_checkpoint = true;
+ || lsn - oldest_lsn > log_sys.max_modified_age_sync
+ || checkpoint_age > log_sys.max_checkpoint_age_async) {
+ log_sys.check_flush_or_checkpoint = true;
}
function_exit:
return(lsn);
}
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are not
-included.
-@return size offset (<= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_size_offset(
-/*=======================*/
- lsn_t offset, /*!< in: real offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- /* The lsn parameters are updated while holding both the mutexes
- and it is ok to have either of them while reading */
- ut_ad(log_mutex_own() || log_write_mutex_own());
-
- return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are
-included.
-@return real offset (>= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_real_offset(
-/*=======================*/
- lsn_t offset, /*!< in: size offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- /* The lsn parameters are updated while holding both the mutexes
- and it is ok to have either of them while reading */
- ut_ad(log_mutex_own() || log_write_mutex_own());
-
- return(offset + LOG_FILE_HDR_SIZE
- * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
-}
-
-/** Calculate the offset of an lsn within a log group.
-@param[in] lsn log sequence number
-@param[in] group log group
-@return offset within the log group */
-lsn_t
-log_group_calc_lsn_offset(
- lsn_t lsn,
- const log_group_t* group)
-{
- lsn_t gr_lsn;
- lsn_t gr_lsn_size_offset;
- lsn_t difference;
- lsn_t group_size;
- lsn_t offset;
-
- /* The lsn parameters are updated while holding both the mutexes
- and it is ok to have either of them while reading */
- ut_ad(log_mutex_own() || log_write_mutex_own());
-
- gr_lsn = group->lsn;
-
- gr_lsn_size_offset = log_group_calc_size_offset(
- group->lsn_offset, group);
-
- group_size = group->capacity();
-
- if (lsn >= gr_lsn) {
-
- difference = lsn - gr_lsn;
- } else {
- difference = gr_lsn - lsn;
-
- difference = difference % group_size;
-
- difference = group_size - difference;
- }
-
- offset = (gr_lsn_size_offset + difference) % group_size;
-
- /* fprintf(stderr,
- "Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
- " difference is " LSN_PF "\n",
- offset, gr_lsn_size_offset, difference);
- */
-
- return(log_group_calc_real_offset(offset, group));
-}
-
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- lsn_t lsn) /*!< in: lsn for which the values should be
- set */
-{
- group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
- group->lsn = lsn;
-}
-
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@@ -689,186 +574,123 @@ log_set_capacity(ulonglong file_size)
log_mutex_enter();
- log_sys->log_group_capacity = smallest_capacity;
+ log_sys.log_group_capacity = smallest_capacity;
- log_sys->max_modified_age_async = margin
+ log_sys.max_modified_age_async = margin
- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
- log_sys->max_modified_age_sync = margin
+ log_sys.max_modified_age_sync = margin
- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
- log_sys->max_checkpoint_age_async = margin - margin
+ log_sys.max_checkpoint_age_async = margin - margin
/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
- log_sys->max_checkpoint_age = margin;
+ log_sys.max_checkpoint_age = margin;
log_mutex_exit();
return(true);
}
-/** Initializes the redo logging subsystem. */
-void
-log_sys_init()
+/** Initialize the redo log subsystem. */
+void log_t::create()
{
- log_sys = static_cast<log_t*>(ut_zalloc_nokey(sizeof(log_t)));
-
- mutex_create(LATCH_ID_LOG_SYS, &log_sys->mutex);
- mutex_create(LATCH_ID_LOG_WRITE, &log_sys->write_mutex);
-
- mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_sys->log_flush_order_mutex);
-
- /* Start the lsn from one log block from zero: this way every
- log record has a start lsn != zero, a fact which we will use */
-
- log_sys->lsn = LOG_START_LSN;
-
- ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
- ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
-
- log_sys->buf_size = LOG_BUFFER_SIZE;
-
- log_sys->buf_ptr = static_cast<byte*>(
- ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
- TRASH_ALLOC(log_sys->buf_ptr,
- log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = static_cast<byte*>(
- ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->first_in_use = true;
-
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- - LOG_BUF_FLUSH_MARGIN;
- log_sys->check_flush_or_checkpoint = true;
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
- /*----------------------------*/
-
- log_sys->write_lsn = log_sys->lsn;
-
- log_sys->flush_event = os_event_create(0);
-
- os_event_set(log_sys->flush_event);
-
- /*----------------------------*/
-
- log_sys->last_checkpoint_lsn = log_sys->lsn;
-
- rw_lock_create(
- checkpoint_lock_key, &log_sys->checkpoint_lock,
- SYNC_NO_ORDER_CHECK);
-
- log_sys->checkpoint_buf_ptr = static_cast<byte*>(
- ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->checkpoint_buf = static_cast<byte*>(
- ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- /*----------------------------*/
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN?
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
- log_scrub_thread_active = !srv_read_only_mode && srv_scrub_log;
- if (log_scrub_thread_active) {
- log_scrub_event = os_event_create("log_scrub_event");
- os_thread_create(log_scrub_thread, NULL, NULL);
- }
+ ut_ad(this == &log_sys);
+ ut_ad(!is_initialised());
+ m_initialised= true;
+
+ mutex_create(LATCH_ID_LOG_SYS, &mutex);
+ mutex_create(LATCH_ID_LOG_WRITE, &write_mutex);
+ mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_flush_order_mutex);
+
+ /* Start the lsn from one log block from zero: this way every
+ log record has a non-zero start lsn, a fact which we will use */
+
+ lsn= LOG_START_LSN;
+
+ ut_ad(srv_log_buffer_size >= 16 * OS_FILE_LOG_BLOCK_SIZE);
+ ut_ad(srv_log_buffer_size >= 4U << srv_page_size_shift);
+
+ buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size * 2));
+ TRASH_ALLOC(buf, srv_log_buffer_size * 2);
+
+ first_in_use= true;
+
+ max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
+ LOG_BUF_FLUSH_MARGIN;
+ check_flush_or_checkpoint= true;
+
+ n_log_ios_old= n_log_ios;
+ last_printout_time= time(NULL);
+
+ buf_next_to_write= 0;
+ is_extending= false;
+ write_lsn= lsn;
+ flushed_to_disk_lsn= 0;
+ n_pending_flushes= 0;
+ flush_event = os_event_create("log_flush_event");
+ os_event_set(flush_event);
+ n_log_ios= 0;
+ n_log_ios_old= 0;
+ log_group_capacity= 0;
+ max_modified_age_async= 0;
+ max_modified_age_sync= 0;
+ max_checkpoint_age_async= 0;
+ max_checkpoint_age= 0;
+ next_checkpoint_no= 0;
+ next_checkpoint_lsn= 0;
+ append_on_checkpoint= NULL;
+ n_pending_checkpoint_writes= 0;
+
+ last_checkpoint_lsn= lsn;
+ rw_lock_create(checkpoint_lock_key, &checkpoint_lock, SYNC_NO_ORDER_CHECK);
+
+ log_block_init(buf, lsn);
+ log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
+
+ buf_free= LOG_BLOCK_HDR_SIZE;
+ lsn= LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
+
+ MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, lsn - last_checkpoint_lsn);
+
+ log_scrub_thread_active= !srv_read_only_mode && srv_scrub_log;
+ if (log_scrub_thread_active) {
+ log_scrub_event= os_event_create("log_scrub_event");
+ os_thread_create(log_scrub_thread, NULL, NULL);
+ }
}
/** Initialize the redo log.
@param[in] n_files number of files */
-void
-log_init(ulint n_files)
+void log_t::files::create(ulint n_files)
{
- ulint i;
- log_group_t* group = &log_sys->log;
-
- group->n_files = n_files;
- group->subformat = srv_safe_truncate;
- if (srv_safe_truncate) {
- group->format = srv_encrypt_log
- ? LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED
- : LOG_HEADER_FORMAT_10_3;
- } else {
- group->format = srv_encrypt_log
- ? LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED
- : LOG_HEADER_FORMAT_10_2;
- }
- group->file_size = srv_log_file_size;
- group->state = LOG_GROUP_OK;
- group->lsn = LOG_START_LSN;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
-
- group->file_header_bufs_ptr = static_cast<byte**>(
- ut_zalloc_nokey(sizeof(byte*) * n_files));
-
- group->file_header_bufs = static_cast<byte**>(
- ut_zalloc_nokey(sizeof(byte**) * n_files));
-
- for (i = 0; i < n_files; i++) {
- group->file_header_bufs_ptr[i] = static_cast<byte*>(
- ut_zalloc_nokey(LOG_FILE_HDR_SIZE
- + OS_FILE_LOG_BLOCK_SIZE));
-
- group->file_header_bufs[i] = static_cast<byte*>(
- ut_align(group->file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE));
- }
-
- group->checkpoint_buf_ptr = static_cast<byte*>(
- ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- group->checkpoint_buf = static_cast<byte*>(
- ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
-}
-
-/******************************************************//**
-Completes an i/o to a log file. */
-void
-log_io_complete(
-/*============*/
- log_group_t* group) /*!< in: log group or a dummy pointer */
-{
- if ((ulint) group & 0x1UL) {
- /* It was a checkpoint write */
- group = (log_group_t*)((ulint) group - 1);
-
- switch (srv_file_flush_method) {
- case SRV_O_DSYNC:
- case SRV_NOSYNC:
- break;
- case SRV_FSYNC:
- case SRV_LITTLESYNC:
- case SRV_O_DIRECT:
- case SRV_O_DIRECT_NO_FSYNC:
- case SRV_ALL_O_DIRECT_FSYNC:
- fil_flush(SRV_LOG_SPACE_FIRST_ID);
- }
-
-
- DBUG_PRINT("ib_log", ("checkpoint info written"));
- log_io_complete_checkpoint();
-
- return;
- }
-
- ut_error; /*!< We currently use synchronous writing of the
- logs and cannot end up here! */
+ ut_ad(n_files <= SRV_N_LOG_FILES_MAX);
+ ut_ad(this == &log_sys.log);
+ ut_ad(log_sys.is_initialised());
+
+ this->n_files= n_files;
+ format= srv_encrypt_log
+ ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED
+ : LOG_HEADER_FORMAT_CURRENT;
+ subformat= 2;
+ file_size= srv_log_file_size;
+ lsn= LOG_START_LSN;
+ lsn_offset= LOG_FILE_HDR_SIZE;
+
+ byte* ptr= static_cast<byte*>(ut_zalloc_nokey(LOG_FILE_HDR_SIZE * n_files
+ + OS_FILE_LOG_BLOCK_SIZE));
+ file_header_bufs_ptr= ptr;
+ ptr= static_cast<byte*>(ut_align(ptr, OS_FILE_LOG_BLOCK_SIZE));
+
+ memset(file_header_bufs, 0, sizeof file_header_bufs);
+
+ for (ulint i = 0; i < n_files; i++, ptr += LOG_FILE_HDR_SIZE)
+ file_header_bufs[i] = ptr;
}
/******************************************************//**
Writes a log file header to a log file space. */
static
void
-log_group_file_header_flush(
-/*========================*/
- log_group_t* group, /*!< in: log group */
+log_file_header_flush(
ulint nth_file, /*!< in: header to the nth file in the
log file space */
lsn_t start_lsn) /*!< in: log file data starts at this
@@ -879,17 +701,15 @@ log_group_file_header_flush(
ut_ad(log_write_mutex_own());
ut_ad(!recv_no_log_write);
- ut_a(nth_file < group->n_files);
- ut_ad((group->format & ~LOG_HEADER_FORMAT_ENCRYPTED)
- == (srv_safe_truncate
- ? LOG_HEADER_FORMAT_10_3
- : LOG_HEADER_FORMAT_10_2));
+ ut_a(nth_file < log_sys.log.n_files);
+ ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+ == LOG_HEADER_FORMAT_CURRENT);
- buf = *(group->file_header_bufs + nth_file);
+ buf = log_sys.log.file_header_bufs[nth_file];
memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
- mach_write_to_4(buf + LOG_HEADER_FORMAT, group->format);
- mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, srv_safe_truncate);
+ mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
+ mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
LOG_HEADER_CREATOR_CURRENT);
@@ -897,26 +717,25 @@ log_group_file_header_flush(
>= sizeof LOG_HEADER_CREATOR_CURRENT);
log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
- dest_offset = nth_file * group->file_size;
+ dest_offset = nth_file * log_sys.log.file_size;
DBUG_PRINT("ib_log", ("write " LSN_PF
" file " ULINTPF " header",
start_lsn, nth_file));
- log_sys->n_log_ios++;
+ log_sys.n_log_ios++;
MONITOR_INC(MONITOR_LOG_IO);
srv_stats.os_log_pending_writes.inc();
- const ulint page_no
- = (ulint) (dest_offset / univ_page_size.physical());
+ const ulint page_no = ulint(dest_offset >> srv_page_size_shift);
fil_io(IORequestLogWrite, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
- (ulint) (dest_offset % univ_page_size.physical()),
- OS_FILE_LOG_BLOCK_SIZE, buf, group);
+ ulint(dest_offset & (srv_page_size - 1)),
+ OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
srv_stats.os_log_pending_writes.dec();
}
@@ -935,12 +754,10 @@ log_block_store_checksum(
}
/******************************************************//**
-Writes a buffer to a log file group. */
+Writes a buffer to a log file. */
static
void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
+log_write_buf(
byte* buf, /*!< in: buffer */
ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
@@ -971,28 +788,27 @@ loop:
return;
}
- next_offset = log_group_calc_lsn_offset(start_lsn, group);
+ next_offset = log_sys.log.calc_lsn_offset(start_lsn);
if (write_header
- && next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
+ && next_offset % log_sys.log.file_size == LOG_FILE_HDR_SIZE) {
/* We start to write a new log file instance in the group */
- ut_a(next_offset / group->file_size <= ULINT_MAX);
+ ut_a(next_offset / log_sys.log.file_size <= ULINT_MAX);
- log_group_file_header_flush(group, (ulint)
- (next_offset / group->file_size),
- start_lsn);
+ log_file_header_flush(
+ ulint(next_offset / log_sys.log.file_size), start_lsn);
srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
srv_stats.log_writes.inc();
}
- if ((next_offset % group->file_size) + len > group->file_size) {
-
+ if ((next_offset % log_sys.log.file_size) + len
+ > log_sys.log.file_size) {
/* if the above condition holds, then the below expression
is < len which is ulint, so the typecast is ok */
- write_len = (ulint)
- (group->file_size - (next_offset % group->file_size));
+ write_len = ulint(log_sys.log.file_size
+ - (next_offset % log_sys.log.file_size));
} else {
write_len = len;
}
@@ -1024,22 +840,20 @@ loop:
log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
}
- log_sys->n_log_ios++;
+ log_sys.n_log_ios++;
MONITOR_INC(MONITOR_LOG_IO);
srv_stats.os_log_pending_writes.inc();
- ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+ ut_a((next_offset >> srv_page_size_shift) <= ULINT_MAX);
- const ulint page_no
- = (ulint) (next_offset / univ_page_size.physical());
+ const ulint page_no = ulint(next_offset >> srv_page_size_shift);
fil_io(IORequestLogWrite, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
- (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group);
+ ulint(next_offset & (srv_page_size - 1)), write_len, buf, NULL);
srv_stats.os_log_pending_writes.dec();
@@ -1063,9 +877,9 @@ static
void
log_write_flush_to_disk_low()
{
- /* FIXME: This is not holding log_sys->mutex while
+ /* FIXME: This is not holding log_sys.mutex while
calling os_event_set()! */
- ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */
+ ut_a(log_sys.n_pending_flushes == 1); /* No other threads here */
bool do_flush = srv_file_flush_method != SRV_O_DSYNC;
@@ -1077,12 +891,12 @@ log_write_flush_to_disk_low()
log_mutex_enter();
if (do_flush) {
- log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
+ log_sys.flushed_to_disk_lsn = log_sys.current_flush_lsn;
}
- log_sys->n_pending_flushes--;
+ log_sys.n_pending_flushes--;
- os_event_set(log_sys->flush_event);
+ os_event_set(log_sys.flush_event);
}
/** Switch the log buffer in use, and copy the content of last block
@@ -1095,29 +909,29 @@ log_buffer_switch()
ut_ad(log_mutex_own());
ut_ad(log_write_mutex_own());
- const byte* old_buf = log_sys->buf;
- ulint area_end = ut_calc_align(log_sys->buf_free,
+ const byte* old_buf = log_sys.buf;
+ ulint area_end = ut_calc_align(log_sys.buf_free,
OS_FILE_LOG_BLOCK_SIZE);
- if (log_sys->first_in_use) {
- log_sys->first_in_use = false;
- ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+ if (log_sys.first_in_use) {
+ log_sys.first_in_use = false;
+ ut_ad(log_sys.buf == ut_align(log_sys.buf,
OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf += log_sys->buf_size;
+ log_sys.buf += srv_log_buffer_size;
} else {
- log_sys->first_in_use = true;
- log_sys->buf -= log_sys->buf_size;
- ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+ log_sys.first_in_use = true;
+ log_sys.buf -= srv_log_buffer_size;
+ ut_ad(log_sys.buf == ut_align(log_sys.buf,
OS_FILE_LOG_BLOCK_SIZE));
}
/* Copy the last block to new buf */
- ut_memcpy(log_sys->buf,
+ ut_memcpy(log_sys.buf,
old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE;
- log_sys->buf_next_to_write = log_sys->buf_free;
+ log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
+ log_sys.buf_next_to_write = log_sys.buf_free;
}
/** Ensure that the log has been written to the log file up to a given
@@ -1156,7 +970,7 @@ loop:
(flush_to_disk == true) case, because the log_mutex
contention also works as the arbitrator for write-IO
(fsync) bandwidth between log files and data files. */
- if (!flush_to_disk && log_sys->write_lsn >= lsn) {
+ if (!flush_to_disk && log_sys.write_lsn >= lsn) {
return;
}
#endif
@@ -1165,8 +979,8 @@ loop:
ut_ad(!recv_no_log_write);
lsn_t limit_lsn = flush_to_disk
- ? log_sys->flushed_to_disk_lsn
- : log_sys->write_lsn;
+ ? log_sys.flushed_to_disk_lsn
+ : log_sys.write_lsn;
if (limit_lsn >= lsn) {
log_write_mutex_exit();
@@ -1179,15 +993,15 @@ loop:
pending flush and based on that we wait for it to finish
before proceeding further. */
if (flush_to_disk
- && (log_sys->n_pending_flushes > 0
- || !os_event_is_set(log_sys->flush_event))) {
+ && (log_sys.n_pending_flushes > 0
+ || !os_event_is_set(log_sys.flush_event))) {
/* Figure out if the current flush will do the job
for us. */
- bool work_done = log_sys->current_flush_lsn >= lsn;
+ bool work_done = log_sys.current_flush_lsn >= lsn;
log_write_mutex_exit();
- os_event_wait(log_sys->flush_event);
+ os_event_wait(log_sys.flush_event);
if (work_done) {
return;
@@ -1198,7 +1012,7 @@ loop:
log_mutex_enter();
if (!flush_to_disk
- && log_sys->buf_free == log_sys->buf_next_to_write) {
+ && log_sys.buf_free == log_sys.buf_next_to_write) {
/* Nothing to write and no flush to disk requested */
log_mutex_exit_all();
return;
@@ -1212,15 +1026,15 @@ loop:
ulint pad_size;
DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
- log_sys->write_lsn,
- log_sys->lsn));
+ log_sys.write_lsn,
+ log_sys.lsn));
if (flush_to_disk) {
- log_sys->n_pending_flushes++;
- log_sys->current_flush_lsn = log_sys->lsn;
+ log_sys.n_pending_flushes++;
+ log_sys.current_flush_lsn = log_sys.lsn;
MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
- os_event_reset(log_sys->flush_event);
+ os_event_reset(log_sys.flush_event);
- if (log_sys->buf_free == log_sys->buf_next_to_write) {
+ if (log_sys.buf_free == log_sys.buf_next_to_write) {
/* Nothing to write, flush only */
log_mutex_exit_all();
log_write_flush_to_disk_low();
@@ -1229,25 +1043,25 @@ loop:
}
}
- start_offset = log_sys->buf_next_to_write;
- end_offset = log_sys->buf_free;
+ start_offset = log_sys.buf_next_to_write;
+ end_offset = log_sys.buf_free;
area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
ut_ad(area_end - area_start > 0);
- log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
+ log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
log_block_set_checkpoint_no(
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- log_sys->next_checkpoint_no);
+ log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ log_sys.next_checkpoint_no);
- write_lsn = log_sys->lsn;
- write_buf = log_sys->buf;
+ write_lsn = log_sys.lsn;
+ write_buf = log_sys.buf;
log_buffer_switch();
- log_group_set_fields(&log_sys->log, log_sys->write_lsn);
+ log_sys.log.set_fields(log_sys.write_lsn);
log_mutex_exit();
/* Erase the end of the last log block. */
@@ -1257,12 +1071,9 @@ loop:
/* Calculate pad_size if needed. */
pad_size = 0;
if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
- lsn_t end_offset;
ulint end_offset_in_unit;
- end_offset = log_group_calc_lsn_offset(
- ut_uint64_align_up(write_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- &log_sys->log);
+ lsn_t end_offset = log_sys.log.calc_lsn_offset(
+ ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
if (end_offset_in_unit > 0
@@ -1270,9 +1081,9 @@ loop:
/* The first block in the unit was initialized
after the last writing.
Needs to be written padded data once. */
- pad_size = std::min(
+ pad_size = std::min<ulint>(
ulint(write_ahead_size) - end_offset_in_unit,
- log_sys->buf_size - area_end);
+ srv_log_buffer_size - area_end);
::memset(write_buf + area_end, 0, pad_size);
}
}
@@ -1281,43 +1092,41 @@ loop:
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"InnoDB log write: "
LSN_PF "," LSN_PF,
- log_sys->write_lsn, lsn);
+ log_sys.write_lsn, lsn);
}
- if (log_sys->is_encrypted()) {
- log_crypt(write_buf + area_start, log_sys->write_lsn,
+ if (log_sys.is_encrypted()) {
+ log_crypt(write_buf + area_start, log_sys.write_lsn,
area_end - area_start);
}
/* Do the write to the log files */
- log_group_write_buf(
- &log_sys->log, write_buf + area_start,
- area_end - area_start + pad_size,
+ log_write_buf(
+ write_buf + area_start, area_end - area_start + pad_size,
#ifdef UNIV_DEBUG
pad_size,
#endif /* UNIV_DEBUG */
- ut_uint64_align_down(log_sys->write_lsn,
+ ut_uint64_align_down(log_sys.write_lsn,
OS_FILE_LOG_BLOCK_SIZE),
start_offset - area_start);
srv_stats.log_padded.add(pad_size);
- log_sys->write_lsn = write_lsn;
+ log_sys.write_lsn = write_lsn;
if (srv_file_flush_method == SRV_O_DSYNC) {
/* O_SYNC means the OS did not buffer the log file at all:
so we have also flushed to disk what we have written */
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
+ log_sys.flushed_to_disk_lsn = log_sys.write_lsn;
}
log_write_mutex_exit();
if (flush_to_disk) {
log_write_flush_to_disk_low();
- ib_uint64_t write_lsn = log_sys->write_lsn;
- ib_uint64_t flush_lsn = log_sys->flushed_to_disk_lsn;
+ ib_uint64_t flush_lsn = log_sys.flushed_to_disk_lsn;
log_mutex_exit();
- innobase_mysql_log_notify(write_lsn, flush_lsn);
+ innobase_mysql_log_notify(flush_lsn);
}
}
@@ -1346,11 +1155,11 @@ log_buffer_sync_in_background(
log_mutex_enter();
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
if (flush
- && log_sys->n_pending_flushes > 0
- && log_sys->current_flush_lsn >= lsn) {
+ && log_sys.n_pending_flushes > 0
+ && log_sys.current_flush_lsn >= lsn) {
/* The write + flush will write enough */
log_mutex_exit();
return;
@@ -1370,14 +1179,13 @@ void
log_flush_margin(void)
/*==================*/
{
- log_t* log = log_sys;
lsn_t lsn = 0;
log_mutex_enter();
- if (log->buf_free > log->max_buf_free) {
+ if (log_sys.buf_free > log_sys.max_buf_free) {
/* We can write during flush */
- lsn = log->lsn;
+ lsn = log_sys.lsn;
}
log_mutex_exit();
@@ -1457,36 +1265,33 @@ log_complete_checkpoint(void)
/*=========================*/
{
ut_ad(log_mutex_own());
- ut_ad(log_sys->n_pending_checkpoint_writes == 0);
+ ut_ad(log_sys.n_pending_checkpoint_writes == 0);
- log_sys->next_checkpoint_no++;
+ log_sys.next_checkpoint_no++;
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
+ log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
+ log_sys.lsn - log_sys.last_checkpoint_lsn);
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
", flushed to " LSN_PF,
- log_sys->last_checkpoint_lsn,
- log_sys->flushed_to_disk_lsn));
+ log_sys.last_checkpoint_lsn,
+ log_sys.flushed_to_disk_lsn));
- rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ rw_lock_x_unlock_gen(&(log_sys.checkpoint_lock), LOG_CHECKPOINT);
}
-/******************************************************//**
-Completes an asynchronous checkpoint info write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void)
-/*============================*/
+/** Complete an asynchronous checkpoint write. */
+void log_t::complete_checkpoint()
{
+ ut_ad(this == &log_sys);
MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
log_mutex_enter();
- ut_ad(log_sys->n_pending_checkpoint_writes > 0);
+ ut_ad(n_pending_checkpoint_writes > 0);
- if (--log_sys->n_pending_checkpoint_writes == 0) {
+ if (!--n_pending_checkpoint_writes) {
log_complete_checkpoint();
}
@@ -1500,91 +1305,78 @@ void
log_group_checkpoint(lsn_t end_lsn)
{
lsn_t lsn_offset;
- byte* buf;
ut_ad(!srv_read_only_mode);
ut_ad(log_mutex_own());
- ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
- ut_ad(end_lsn <= log_sys->lsn);
- ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
+ ut_ad(end_lsn == 0 || end_lsn >= log_sys.next_checkpoint_lsn);
+ ut_ad(end_lsn <= log_sys.lsn);
+ ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys.lsn
|| srv_shutdown_state != SRV_SHUTDOWN_NONE);
DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
" written",
- log_sys->next_checkpoint_no,
- log_sys->next_checkpoint_lsn));
-
- log_group_t* group = &log_sys->log;
+ log_sys.next_checkpoint_no,
+ log_sys.next_checkpoint_lsn));
- buf = group->checkpoint_buf;
+ byte* buf = log_sys.checkpoint_buf;
memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+ mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
- if (log_sys->is_encrypted()) {
+ if (log_sys.is_encrypted()) {
log_crypt_write_checkpoint_buf(buf);
}
- lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
- group);
+ lsn_offset = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
- mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
+ srv_log_buffer_size);
mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
- log_sys->n_log_ios++;
+ log_sys.n_log_ios++;
MONITOR_INC(MONITOR_LOG_IO);
- ut_ad(LOG_CHECKPOINT_1 < univ_page_size.physical());
- ut_ad(LOG_CHECKPOINT_2 < univ_page_size.physical());
+ ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
+ ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
- if (log_sys->n_pending_checkpoint_writes++ == 0) {
- rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
+ if (log_sys.n_pending_checkpoint_writes++ == 0) {
+ rw_lock_x_lock_gen(&log_sys.checkpoint_lock,
LOG_CHECKPOINT);
}
/* Note: We alternate the physical place of the checkpoint info.
See the (next_checkpoint_no & 1) below. */
- /* We send as the last parameter the group machine address
- added with 1, as we want to distinguish between a normal log
- file write and a checkpoint field write */
-
fil_io(IORequestLogWrite, false,
page_id_t(SRV_LOG_SPACE_FIRST_ID, 0),
univ_page_size,
- (log_sys->next_checkpoint_no & 1)
+ (log_sys.next_checkpoint_no & 1)
? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
OS_FILE_LOG_BLOCK_SIZE,
- buf, (byte*) group + 1);
-
- ut_ad(((ulint) group & 0x1UL) == 0);
+ buf, reinterpret_cast<void*>(1) /* checkpoint write */);
}
-/** Read a log group header page to log_sys->checkpoint_buf.
-@param[in] group log group
-@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
-void
-log_group_header_read(
- const log_group_t* group,
- ulint header)
+/** Read a log group header page to log_sys.checkpoint_buf.
+@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
+void log_header_read(ulint header)
{
ut_ad(log_mutex_own());
- log_sys->n_log_ios++;
+ log_sys.n_log_ios++;
MONITOR_INC(MONITOR_LOG_IO);
fil_io(IORequestLogRead, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID,
- header / univ_page_size.physical()),
- univ_page_size, header % univ_page_size.physical(),
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+ header >> srv_page_size_shift),
+ univ_page_size, header & (srv_page_size - 1),
+ OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL);
}
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@@ -1604,8 +1396,8 @@ log_write_checkpoint_info(bool sync, lsn_t end_lsn)
if (sync) {
/* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&log_sys->checkpoint_lock);
- rw_lock_s_unlock(&log_sys->checkpoint_lock);
+ rw_lock_s_lock(&log_sys.checkpoint_lock);
+ rw_lock_s_unlock(&log_sys.checkpoint_lock);
DBUG_EXECUTE_IF(
"crash_after_checkpoint",
@@ -1621,8 +1413,8 @@ log_append_on_checkpoint(
mtr_buf_t* buf)
{
log_mutex_enter();
- mtr_buf_t* old = log_sys->append_on_checkpoint;
- log_sys->append_on_checkpoint = buf;
+ mtr_buf_t* old = log_sys.append_on_checkpoint;
+ log_sys.append_on_checkpoint = buf;
log_mutex_exit();
return(old);
}
@@ -1661,7 +1453,9 @@ log_checkpoint(
case SRV_LITTLESYNC:
case SRV_O_DIRECT:
case SRV_O_DIRECT_NO_FSYNC:
+#ifdef _WIN32
case SRV_ALL_O_DIRECT_FSYNC:
+#endif
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
}
@@ -1671,24 +1465,24 @@ log_checkpoint(
oldest_lsn = log_buf_pool_get_oldest_modification();
/* Because log also contains headers and dummy log records,
- log_buf_pool_get_oldest_modification() will return log_sys->lsn
+ log_buf_pool_get_oldest_modification() will return log_sys.lsn
if the buffer pool contains no dirty buffers.
We must make sure that the log is flushed up to that lsn.
If there are dirty buffers in the buffer pool, then our
write-ahead-logging algorithm ensures that the log has been
flushed up to oldest_lsn. */
- ut_ad(oldest_lsn >= log_sys->last_checkpoint_lsn);
+ ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
if (!write_always
&& oldest_lsn
- <= log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
+ <= log_sys.last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
/* Do nothing, because nothing was logged (other than
a MLOG_CHECKPOINT marker) since the previous checkpoint. */
log_mutex_exit();
return(true);
}
/* Repeat the MLOG_FILE_NAME records after the checkpoint, in
- case some log records between the checkpoint and log_sys->lsn
+ case some log records between the checkpoint and log_sys.lsn
need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
apply expects to see a MLOG_CHECKPOINT after the checkpoint,
except on clean shutdown, where the log will be empty after
@@ -1699,14 +1493,14 @@ log_checkpoint(
threads will be blocked, and no pages can be added to the
flush lists. */
lsn_t flush_lsn = oldest_lsn;
- const lsn_t end_lsn = log_sys->lsn;
+ const lsn_t end_lsn = log_sys.lsn;
const bool do_write
= srv_shutdown_state == SRV_SHUTDOWN_NONE
|| flush_lsn != end_lsn;
if (fil_names_clear(flush_lsn, do_write)) {
- ut_ad(log_sys->lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
- flush_lsn = log_sys->lsn;
+ ut_ad(log_sys.lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
+ flush_lsn = log_sys.lsn;
}
log_mutex_exit();
@@ -1729,28 +1523,28 @@ log_checkpoint(
log_mutex_enter();
- ut_ad(log_sys->flushed_to_disk_lsn >= flush_lsn);
+ ut_ad(log_sys.flushed_to_disk_lsn >= flush_lsn);
ut_ad(flush_lsn >= oldest_lsn);
- if (log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ if (log_sys.last_checkpoint_lsn >= oldest_lsn) {
log_mutex_exit();
return(true);
}
- if (log_sys->n_pending_checkpoint_writes > 0) {
+ if (log_sys.n_pending_checkpoint_writes > 0) {
/* A checkpoint write is running */
log_mutex_exit();
if (sync) {
/* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&log_sys->checkpoint_lock);
- rw_lock_s_unlock(&log_sys->checkpoint_lock);
+ rw_lock_s_lock(&log_sys.checkpoint_lock);
+ rw_lock_s_unlock(&log_sys.checkpoint_lock);
}
return(false);
}
- log_sys->next_checkpoint_lsn = oldest_lsn;
+ log_sys.next_checkpoint_lsn = oldest_lsn;
log_write_checkpoint_info(sync, end_lsn);
ut_ad(!log_mutex_own());
@@ -1788,7 +1582,6 @@ void
log_checkpoint_margin(void)
/*=======================*/
{
- log_t* log = log_sys;
lsn_t age;
lsn_t checkpoint_age;
ib_uint64_t advance;
@@ -1800,39 +1593,39 @@ loop:
log_mutex_enter();
ut_ad(!recv_no_log_write);
- if (!log->check_flush_or_checkpoint) {
+ if (!log_sys.check_flush_or_checkpoint) {
log_mutex_exit();
return;
}
oldest_lsn = log_buf_pool_get_oldest_modification();
- age = log->lsn - oldest_lsn;
+ age = log_sys.lsn - oldest_lsn;
- if (age > log->max_modified_age_sync) {
+ if (age > log_sys.max_modified_age_sync) {
/* A flush is urgent: we have to do a synchronous preflush */
- advance = age - log->max_modified_age_sync;
+ advance = age - log_sys.max_modified_age_sync;
}
- checkpoint_age = log->lsn - log->last_checkpoint_lsn;
+ checkpoint_age = log_sys.lsn - log_sys.last_checkpoint_lsn;
bool checkpoint_sync;
bool do_checkpoint;
- if (checkpoint_age > log->max_checkpoint_age) {
+ if (checkpoint_age > log_sys.max_checkpoint_age) {
/* A checkpoint is urgent: we do it synchronously */
checkpoint_sync = true;
do_checkpoint = true;
- } else if (checkpoint_age > log->max_checkpoint_age_async) {
+ } else if (checkpoint_age > log_sys.max_checkpoint_age_async) {
/* A checkpoint is not urgent: do it asynchronously */
do_checkpoint = true;
checkpoint_sync = false;
- log->check_flush_or_checkpoint = false;
+ log_sys.check_flush_or_checkpoint = false;
} else {
do_checkpoint = false;
checkpoint_sync = false;
- log->check_flush_or_checkpoint = false;
+ log_sys.check_flush_or_checkpoint = false;
}
log_mutex_exit();
@@ -1847,9 +1640,7 @@ loop:
thread doing a flush at the same time. */
if (!success) {
log_mutex_enter();
-
- log->check_flush_or_checkpoint = true;
-
+ log_sys.check_flush_or_checkpoint = true;
log_mutex_exit();
goto loop;
}
@@ -1880,7 +1671,7 @@ log_check_margins(void)
log_checkpoint_margin();
log_mutex_enter();
ut_ad(!recv_no_log_write);
- check = log_sys->check_flush_or_checkpoint;
+ check = log_sys.check_flush_or_checkpoint;
log_mutex_exit();
} while (check);
}
@@ -1904,17 +1695,17 @@ logs_empty_and_mark_files_at_shutdown(void)
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
loop:
- ut_ad(lock_sys || !srv_was_started);
- ut_ad(log_sys || !srv_was_started);
- ut_ad(fil_system || !srv_was_started);
+ ut_ad(lock_sys.is_initialised() || !srv_was_started);
+ ut_ad(log_sys.is_initialised() || !srv_was_started);
+ ut_ad(fil_system.is_initialised() || !srv_was_started);
os_event_set(srv_buf_resize_event);
if (!srv_read_only_mode) {
os_event_set(srv_error_event);
os_event_set(srv_monitor_event);
os_event_set(srv_buf_dump_event);
- if (lock_sys) {
- os_event_set(lock_sys->timeout_event);
+ if (lock_sys.timeout_thread_active) {
+ os_event_set(lock_sys.timeout_event);
}
if (dict_stats_event) {
os_event_set(dict_stats_event);
@@ -1941,7 +1732,7 @@ loop:
if (ulint total_trx = srv_was_started && !srv_read_only_mode
&& srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
- ? trx_sys_any_active_transactions() : 0) {
+ ? trx_sys.any_active_transactions() : 0) {
if (srv_print_verbose_log && count > COUNT_INTERVAL) {
service_manager_extend_timeout(
@@ -1969,14 +1760,14 @@ loop:
goto wait_suspend_loop;
} else if (srv_dict_stats_thread_active) {
thread_name = "dict_stats_thread";
- } else if (lock_sys && lock_sys->timeout_thread_active) {
+ } else if (lock_sys.timeout_thread_active) {
thread_name = "lock_wait_timeout_thread";
} else if (srv_buf_dump_thread_active) {
thread_name = "buf_dump_thread";
goto wait_suspend_loop;
} else if (btr_defragment_thread_active) {
thread_name = "btr_defragment_thread";
- } else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) {
+ } else if (srv_fast_shutdown != 2 && trx_rollback_is_active) {
thread_name = "rollback of recovered transactions";
} else {
thread_name = NULL;
@@ -2042,10 +1833,10 @@ wait_suspend_loop:
os_event_set(log_scrub_event);
}
- if (log_sys) {
+ if (log_sys.is_initialised()) {
log_mutex_enter();
- const ulint n_write = log_sys->n_pending_checkpoint_writes;
- const ulint n_flush = log_sys->n_pending_flushes;
+ const ulint n_write = log_sys.n_pending_checkpoint_writes;
+ const ulint n_flush = log_sys.n_pending_flushes;
log_mutex_exit();
if (log_scrub_thread_active || n_write || n_flush) {
@@ -2096,7 +1887,7 @@ wait_suspend_loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
- if (fil_system) {
+ if (fil_system.is_initialised()) {
fil_close_all_files();
}
return;
@@ -2109,10 +1900,10 @@ wait_suspend_loop:
log_mutex_enter();
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
- const bool lsn_changed = lsn != log_sys->last_checkpoint_lsn;
- ut_ad(lsn >= log_sys->last_checkpoint_lsn);
+ const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn;
+ ut_ad(lsn >= log_sys.last_checkpoint_lsn);
log_mutex_exit();
@@ -2136,7 +1927,7 @@ wait_suspend_loop:
"Free innodb buffer pool");
buf_all_freed();
- ut_a(lsn == log_sys->lsn
+ ut_a(lsn == log_sys.lsn
|| srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
if (lsn < srv_start_lsn) {
@@ -2160,7 +1951,7 @@ wait_suspend_loop:
/* Make some checks that the server really is quiet */
ut_a(srv_get_active_thread_type() == SRV_NONE);
- ut_a(lsn == log_sys->lsn
+ ut_a(lsn == log_sys.lsn
|| srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
}
@@ -2172,8 +1963,8 @@ log_peek_lsn(
/*=========*/
lsn_t* lsn) /*!< out: if returns TRUE, current lsn is here */
{
- if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
- *lsn = log_sys->lsn;
+ if (0 == mutex_enter_nowait(&(log_sys.mutex))) {
+ *lsn = log_sys.lsn;
log_mutex_exit();
@@ -2200,15 +1991,15 @@ log_print(
"Log flushed up to " LSN_PF "\n"
"Pages flushed up to " LSN_PF "\n"
"Last checkpoint at " LSN_PF "\n",
- log_sys->lsn,
- log_sys->flushed_to_disk_lsn,
+ log_sys.lsn,
+ log_sys.flushed_to_disk_lsn,
log_buf_pool_get_oldest_modification(),
- log_sys->last_checkpoint_lsn);
+ log_sys.last_checkpoint_lsn);
current_time = time(NULL);
time_elapsed = difftime(current_time,
- log_sys->last_printout_time);
+ log_sys.last_printout_time);
if (time_elapsed <= 0) {
time_elapsed = 1;
@@ -2218,15 +2009,15 @@ log_print(
ULINTPF " pending log flushes, "
ULINTPF " pending chkp writes\n"
ULINTPF " log i/o's done, %.2f log i/o's/second\n",
- log_sys->n_pending_flushes,
- log_sys->n_pending_checkpoint_writes,
- log_sys->n_log_ios,
+ log_sys.n_pending_flushes,
+ log_sys.n_pending_checkpoint_writes,
+ log_sys.n_log_ios,
static_cast<double>(
- log_sys->n_log_ios - log_sys->n_log_ios_old)
+ log_sys.n_log_ios - log_sys.n_log_ios_old)
/ time_elapsed);
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = current_time;
+ log_sys.n_log_ios_old = log_sys.n_log_ios;
+ log_sys.last_printout_time = current_time;
log_mutex_exit();
}
@@ -2237,68 +2028,39 @@ void
log_refresh_stats(void)
/*===================*/
{
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
-}
-
-/** Close a log group.
-@param[in,out] group log group to close */
-static
-void
-log_group_close(log_group_t* group)
-{
- ulint i;
-
- for (i = 0; i < group->n_files; i++) {
- ut_free(group->file_header_bufs_ptr[i]);
- }
-
- ut_free(group->file_header_bufs_ptr);
- ut_free(group->file_header_bufs);
- ut_free(group->checkpoint_buf_ptr);
- group->n_files = 0;
- group->file_header_bufs_ptr = NULL;
- group->file_header_bufs = NULL;
- group->checkpoint_buf_ptr = NULL;
-}
-
-/********************************************************//**
-Closes all log groups. */
-void
-log_group_close_all(void)
-/*=====================*/
-{
- log_group_close(&log_sys->log);
+ log_sys.n_log_ios_old = log_sys.n_log_ios;
+ log_sys.last_printout_time = time(NULL);
}
/** Shut down the redo log subsystem. */
-void
-log_shutdown()
+void log_t::close()
{
- log_group_close_all();
+ ut_ad(this == &log_sys);
+ if (!is_initialised()) return;
+ m_initialised = false;
+ log.close();
- ut_free(log_sys->buf_ptr);
- log_sys->buf_ptr = NULL;
- log_sys->buf = NULL;
- ut_free(log_sys->checkpoint_buf_ptr);
- log_sys->checkpoint_buf_ptr = NULL;
- log_sys->checkpoint_buf = NULL;
+ if (!first_in_use)
+ buf -= srv_log_buffer_size;
+ ut_free_dodump(buf, srv_log_buffer_size * 2);
+ buf = NULL;
- os_event_destroy(log_sys->flush_event);
+ os_event_destroy(flush_event);
- rw_lock_free(&log_sys->checkpoint_lock);
+ rw_lock_free(&checkpoint_lock);
+ /* rw_lock_free() already called checkpoint_lock.~rw_lock_t();
+ tame the debug assertions when the destructor will be called once more. */
+ ut_ad(checkpoint_lock.magic_n == 0);
+ ut_d(checkpoint_lock.magic_n = RW_LOCK_MAGIC_N);
- mutex_free(&log_sys->mutex);
- mutex_free(&log_sys->write_mutex);
- mutex_free(&log_sys->log_flush_order_mutex);
+ mutex_free(&mutex);
+ mutex_free(&write_mutex);
+ mutex_free(&log_flush_order_mutex);
- if (!srv_read_only_mode && srv_scrub_log) {
- os_event_destroy(log_scrub_event);
- }
+ if (!srv_read_only_mode && srv_scrub_log)
+ os_event_destroy(log_scrub_event);
- recv_sys_close();
- ut_free(log_sys);
- log_sys = NULL;
+ recv_sys_close();
}
/******************************************************//**
@@ -2319,7 +2081,7 @@ log_pad_current_log_block(void)
lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
pad_length = OS_FILE_LOG_BLOCK_SIZE
- - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
- LOG_BLOCK_TRL_SIZE;
if (pad_length
== (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
@@ -2336,7 +2098,7 @@ log_pad_current_log_block(void)
log_write_low(&b, 1);
}
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
log_close();
@@ -2352,14 +2114,14 @@ log_scrub()
/*=========*/
{
log_mutex_enter();
- ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
+ ulint cur_lbn = log_block_convert_lsn_to_no(log_sys.lsn);
if (next_lbn_to_pad == cur_lbn)
{
log_pad_current_log_block();
}
- next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
+ next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys.lsn);
log_mutex_exit();
}
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index f71067fddf2..afcd9079480 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -60,7 +60,7 @@ Created 9/20/1997 Heikki Tuuri
#include "row0merge.h"
/** Log records are stored in the hash table in chunks at most of this size;
-this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
+this must be less than srv_page_size as it is stored in the buffer pool */
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
/** Read-ahead area in applying log records to file pages */
@@ -77,7 +77,7 @@ volatile bool recv_recovery_on;
bool recv_needed_recovery;
#ifdef UNIV_DEBUG
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
+Protected by log_sys.mutex. */
bool recv_no_log_write = false;
#endif /* UNIV_DEBUG */
@@ -568,7 +568,9 @@ recv_sys_close()
os_event_destroy(recv_sys->flush_end);
}
- ut_free(recv_sys->buf);
+ if (recv_sys->buf != NULL) {
+ ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
+ }
ut_ad(!recv_writer_thread_active);
mutex_free(&recv_sys->writer_mutex);
@@ -627,7 +629,7 @@ DECLARE_THREAD(recv_writer_thread)(
/* Wait till we get a signal to clean the LRU list.
Bounded by max wait time of 100ms. */
- ib_uint64_t sig_count = os_event_reset(buf_flush_event);
+ int64_t sig_count = os_event_reset(buf_flush_event);
os_event_wait_time_low(buf_flush_event, 100000, sig_count);
mutex_enter(&recv_sys->writer_mutex);
@@ -683,7 +685,8 @@ recv_sys_init()
}
recv_sys->buf = static_cast<byte*>(
- ut_malloc_nokey(RECV_PARSING_BUF_SIZE));
+ ut_malloc_dontdump(RECV_PARSING_BUF_SIZE));
+ recv_sys->buf_size = RECV_PARSING_BUF_SIZE;
recv_sys->addr_hash = hash_create(size / 512);
recv_sys->progress_time = ut_time();
@@ -717,8 +720,9 @@ recv_sys_debug_free(void)
hash_table_free(recv_sys->addr_hash);
mem_heap_free(recv_sys->heap);
- ut_free(recv_sys->buf);
+ ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
+ recv_sys->buf_size = 0;
recv_sys->buf = NULL;
recv_sys->heap = NULL;
recv_sys->addr_hash = NULL;
@@ -734,57 +738,46 @@ recv_sys_debug_free(void)
mutex_exit(&(recv_sys->mutex));
}
-/** Read a log segment to a buffer.
-@param[out] buf buffer
-@param[in] group redo log files
-@param[in, out] start_lsn in : read area start, out: the last read valid lsn
+/** Read a log segment to log_sys.buf.
+@param[in,out] start_lsn in: read area start,
+out: the last read valid lsn
@param[in] end_lsn read area end
-@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
-@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
-bool
-log_group_read_log_seg(
- byte* buf,
- const log_group_t* group,
- lsn_t *start_lsn,
- lsn_t end_lsn)
+@return whether no invalid blocks (e.g checksum mismatch) were found */
+bool log_t::files::read_log_seg(lsn_t* start_lsn, lsn_t end_lsn)
{
ulint len;
- lsn_t source_offset;
bool success = true;
- ut_ad(log_mutex_own());
+ ut_ad(log_sys.mutex.is_owned());
ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE));
ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE));
-
+ byte* buf = log_sys.buf;
loop:
- source_offset = log_group_calc_lsn_offset(*start_lsn, group);
+ lsn_t source_offset = calc_lsn_offset(*start_lsn);
ut_a(end_lsn - *start_lsn <= ULINT_MAX);
len = (ulint) (end_lsn - *start_lsn);
ut_ad(len != 0);
- const bool at_eof = (source_offset % group->file_size) + len
- > group->file_size;
+ const bool at_eof = (source_offset % file_size) + len > file_size;
if (at_eof) {
/* If the above condition is true then len (which is ulint)
is > the expression below, so the typecast is ok */
- len = (ulint) (group->file_size -
- (source_offset % group->file_size));
+ len = ulint(file_size - (source_offset % file_size));
}
- log_sys->n_log_ios++;
+ log_sys.n_log_ios++;
MONITOR_INC(MONITOR_LOG_IO);
- ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+ ut_a((source_offset >> srv_page_size_shift) <= ULINT_MAX);
- const ulint page_no
- = (ulint) (source_offset / univ_page_size.physical());
+ const ulint page_no = ulint(source_offset >> srv_page_size_shift);
fil_io(IORequestLogRead, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
- (ulint) (source_offset % univ_page_size.physical()),
+ ulint(source_offset & (srv_page_size - 1)),
len, buf, NULL);
for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE,
@@ -802,7 +795,7 @@ loop:
break;
}
- if (innodb_log_checksums || group->is_encrypted()) {
+ if (innodb_log_checksums || is_encrypted()) {
ulint crc = log_block_calc_checksum_crc32(buf);
ulint cksum = log_block_get_checksum(buf);
@@ -825,7 +818,7 @@ loop:
break;
}
- if (group->is_encrypted()) {
+ if (is_encrypted()) {
log_crypt(buf, *start_lsn,
OS_FILE_LOG_BLOCK_SIZE, true);
}
@@ -872,14 +865,10 @@ recv_synchronize_groups()
the block is always incomplete */
lsn_t start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- log_group_read_log_seg(log_sys->buf, &log_sys->log,
- &start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE);
-
- /* Update the fields in the group struct to correspond to
- recovered_lsn */
-
- log_group_set_fields(&log_sys->log, recovered_lsn);
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_sys.log.read_log_seg(&start_lsn,
+ start_lsn + OS_FILE_LOG_BLOCK_SIZE);
+ log_sys.log.set_fields(recovered_lsn);
/* Copy the checkpoint info to the log; remember that we have
incremented checkpoint_no by one, and the info will not be written
@@ -905,19 +894,17 @@ recv_check_log_header_checksum(
}
/** Find the latest checkpoint in the format-0 log header.
-@param[out] max_group log group, or NULL
@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
-recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
+recv_find_max_checkpoint_0(ulint* max_field)
{
- log_group_t* group = &log_sys->log;
ib_uint64_t max_no = 0;
ib_uint64_t checkpoint_no;
- byte* buf = log_sys->checkpoint_buf;
+ byte* buf = log_sys.checkpoint_buf;
- ut_ad(group->format == 0);
+ ut_ad(log_sys.log.format == 0);
/** Offset of the first checkpoint checksum */
static const uint CHECKSUM_1 = 288;
@@ -928,11 +915,11 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
/** Least significant bits of the checkpoint offset */
static const uint OFFSET_LOW32 = 16;
- *max_group = NULL;
+ bool found = false;
for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
- log_group_header_read(group, field);
+ log_header_read(field);
if (static_cast<uint32_t>(ut_fold_binary(buf, CHECKSUM_1))
!= mach_read_from_4(buf + CHECKSUM_1)
@@ -959,21 +946,19 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
mach_read_from_8(buf + LOG_CHECKPOINT_LSN)));
if (checkpoint_no >= max_no) {
- *max_group = group;
+ found = true;
*max_field = field;
max_no = checkpoint_no;
- group->state = LOG_GROUP_OK;
-
- group->lsn = mach_read_from_8(
+ log_sys.log.lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = static_cast<ib_uint64_t>(
+ log_sys.log.lsn_offset = static_cast<ib_uint64_t>(
mach_read_from_4(buf + OFFSET_HIGH32)) << 32
| mach_read_from_4(buf + OFFSET_LOW32);
}
}
- if (*max_group != NULL) {
+ if (found) {
return(DB_SUCCESS);
}
@@ -994,34 +979,27 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
{
log_mutex_enter();
- log_group_t* group = &log_sys->log;
- const lsn_t source_offset
- = log_group_calc_lsn_offset(lsn, group);
+ const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn);
log_mutex_exit();
- const ulint page_no
- = (ulint) (source_offset / univ_page_size.physical());
- byte* buf = log_sys->buf;
+ const ulint page_no = ulint(source_offset >> srv_page_size_shift);
+ byte* buf = log_sys.buf;
static const char* NO_UPGRADE_RECOVERY_MSG =
"Upgrade after a crash is not supported."
" This redo log was created before MariaDB 10.2.2";
- static const char* NO_UPGRADE_RTFM_MSG =
- ". Please follow the instructions at "
- "https://mariadb.com/kb/en/library/upgrading/";
fil_io(IORequestLogRead, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
- (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
- % univ_page_size.physical()),
- OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
+ ulint((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
+ & (srv_page_size - 1)),
+ OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
if (log_block_calc_checksum_format_0(buf)
!= log_block_get_checksum(buf)
&& !log_crypt_101_read_block(buf)) {
ib::error() << NO_UPGRADE_RECOVERY_MSG
- << ", and it appears corrupted"
- << NO_UPGRADE_RTFM_MSG;
+ << ", and it appears corrupted.";
return(DB_CORRUPTION);
}
@@ -1029,12 +1007,11 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
== (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
} else if (crypt) {
ib::error() << "Cannot decrypt log for upgrading."
- " The encrypted log was created before MariaDB 10.2.2"
- << NO_UPGRADE_RTFM_MSG;
+ " The encrypted log was created"
+ " before MariaDB 10.2.2.";
return DB_ERROR;
} else {
- ib::error() << NO_UPGRADE_RECOVERY_MSG
- << NO_UPGRADE_RTFM_MSG;
+ ib::error() << NO_UPGRADE_RECOVERY_MSG << ".";
return(DB_ERROR);
}
@@ -1043,29 +1020,29 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
recv_sys->parse_start_lsn = recv_sys->recovered_lsn
= recv_sys->scanned_lsn
= recv_sys->mlog_checkpoint_lsn = lsn;
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
- = log_sys->lsn = log_sys->write_lsn
- = log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+ log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn
+ = log_sys.lsn = log_sys.write_lsn
+ = log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn
= lsn;
- log_sys->next_checkpoint_no = 0;
+ log_sys.next_checkpoint_no = 0;
return(DB_SUCCESS);
}
-/** Determine if a redo log from MariaDB 10.3 is clean.
+/** Determine if a redo log from MariaDB 10.4 is clean.
@return error code
@retval DB_SUCCESS if the redo log is clean
@retval DB_CORRUPTION if the redo log is corrupted
@retval DB_ERROR if the redo log is not empty */
-static
-dberr_t
-recv_log_recover_10_3()
+static dberr_t recv_log_recover_10_4()
{
- log_group_t* group = &log_sys->log;
- const lsn_t lsn = group->lsn;
- const lsn_t source_offset = log_group_calc_lsn_offset(lsn, group);
+ ut_ad(!log_sys.is_encrypted());
+ const lsn_t lsn = log_sys.log.lsn;
+ log_mutex_enter();
+ const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn);
+ log_mutex_exit();
const ulint page_no
= (ulint) (source_offset / univ_page_size.physical());
- byte* buf = log_sys->buf;
+ byte* buf = log_sys.buf;
fil_io(IORequestLogRead, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
@@ -1075,11 +1052,7 @@ recv_log_recover_10_3()
OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) {
- return(DB_CORRUPTION);
- }
-
- if (group->is_encrypted()) {
- log_crypt(buf, lsn, OS_FILE_LOG_BLOCK_SIZE, true);
+ return DB_CORRUPTION;
}
/* On a clean shutdown, the redo log will be logically empty
@@ -1087,7 +1060,7 @@ recv_log_recover_10_3()
if (log_block_get_data_len(buf)
!= (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
- return(DB_ERROR);
+ return DB_ERROR;
}
/* Mark the redo log for downgrading. */
@@ -1095,12 +1068,12 @@ recv_log_recover_10_3()
recv_sys->parse_start_lsn = recv_sys->recovered_lsn
= recv_sys->scanned_lsn
= recv_sys->mlog_checkpoint_lsn = lsn;
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
- = log_sys->lsn = log_sys->write_lsn
- = log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+ log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn
+ = log_sys.lsn = log_sys.write_lsn
+ = log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn
= lsn;
- log_sys->next_checkpoint_no = 0;
- return(DB_SUCCESS);
+ log_sys.next_checkpoint_no = 0;
+ return DB_SUCCESS;
}
/** Find the latest checkpoint in the log header.
@@ -1109,29 +1082,24 @@ recv_log_recover_10_3()
dberr_t
recv_find_max_checkpoint(ulint* max_field)
{
- log_group_t* group;
ib_uint64_t max_no;
ib_uint64_t checkpoint_no;
ulint field;
byte* buf;
- group = &log_sys->log;
-
max_no = 0;
*max_field = 0;
- buf = log_sys->checkpoint_buf;
-
- group->state = LOG_GROUP_CORRUPTED;
+ buf = log_sys.checkpoint_buf;
- log_group_header_read(group, 0);
+ log_header_read(0);
/* Check the header page checksum. There was no
checksum in the first redo log format (version 0). */
- group->format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
- group->subformat = group->format
+ log_sys.log.format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
+ log_sys.log.subformat = log_sys.log.format != LOG_HEADER_FORMAT_3_23
? mach_read_from_4(buf + LOG_HEADER_SUBFORMAT)
: 0;
- if (group->format != 0
+ if (log_sys.log.format != LOG_HEADER_FORMAT_3_23
&& !recv_check_log_header_checksum(buf)) {
ib::error() << "Invalid redo log header checksum.";
return(DB_CORRUPTION);
@@ -1143,35 +1111,27 @@ recv_find_max_checkpoint(ulint* max_field)
/* Ensure that the string is NUL-terminated. */
creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR] = 0;
- switch (group->format) {
- case 0:
- return(recv_find_max_checkpoint_0(&group, max_field));
+ switch (log_sys.log.format) {
+ case LOG_HEADER_FORMAT_3_23:
+ return(recv_find_max_checkpoint_0(max_field));
case LOG_HEADER_FORMAT_10_2:
case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
- case LOG_HEADER_FORMAT_10_3:
- case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
+ case LOG_HEADER_FORMAT_CURRENT:
+ case LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED:
case LOG_HEADER_FORMAT_10_4:
/* We can only parse the unencrypted LOG_HEADER_FORMAT_10_4.
The encrypted format uses a larger redo log block trailer. */
break;
default:
ib::error() << "Unsupported redo log format."
- " The redo log was created"
- " with " << creator <<
- ". Please follow the instructions at "
- REFMAN "upgrading-downgrading.html";
- /* Do not issue a message about a possibility
- to cleanly shut down the newer server version
- and to remove the redo logs, because the
- format of the system data structures may
- radically change after MySQL 5.7. */
+ " The redo log was created with " << creator << ".";
return(DB_ERROR);
}
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
- log_group_header_read(group, field);
+ log_header_read(field);
const ulint crc32 = log_block_calc_checksum_crc32(buf);
const ulint cksum = log_block_get_checksum(buf);
@@ -1186,7 +1146,7 @@ recv_find_max_checkpoint(ulint* max_field)
continue;
}
- if (group->is_encrypted()
+ if (log_sys.is_encrypted()
&& !log_crypt_read_checkpoint_buf(buf)) {
ib::error() << "Reading checkpoint"
" encryption info failed.";
@@ -1204,12 +1164,11 @@ recv_find_max_checkpoint(ulint* max_field)
if (checkpoint_no >= max_no) {
*max_field = field;
max_no = checkpoint_no;
- group->state = LOG_GROUP_OK;
- group->lsn = mach_read_from_8(
+ log_sys.log.lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = mach_read_from_8(
+ log_sys.log.lsn_offset = mach_read_from_8(
buf + LOG_CHECKPOINT_OFFSET);
- log_sys->next_checkpoint_no = checkpoint_no;
+ log_sys.next_checkpoint_no = checkpoint_no;
}
}
@@ -1226,22 +1185,8 @@ recv_find_max_checkpoint(ulint* max_field)
return(DB_ERROR);
}
- switch (group->format) {
- case LOG_HEADER_FORMAT_10_3:
- case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
- if (group->subformat == 1) {
- /* 10.2 with new crash-safe TRUNCATE */
- break;
- }
- /* fall through */
- case LOG_HEADER_FORMAT_10_4:
- if (srv_operation == SRV_OPERATION_BACKUP) {
- ib::error()
- << "Incompatible redo log format."
- " The redo log was created with " << creator;
- return DB_ERROR;
- }
- dberr_t err = recv_log_recover_10_3();
+ if (log_sys.log.format == LOG_HEADER_FORMAT_10_4) {
+ dberr_t err = recv_log_recover_10_4();
if (err != DB_SUCCESS) {
ib::error()
<< "Downgrade after a crash is not supported."
@@ -1249,10 +1194,10 @@ recv_find_max_checkpoint(ulint* max_field)
<< (err == DB_ERROR
? "." : ", and it appears corrupted.");
}
- return(err);
+ return err;
}
- return(DB_SUCCESS);
+ return DB_SUCCESS;
}
/** Try to parse a single log record body and also applies it if
@@ -1584,18 +1529,22 @@ parse_log:
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
break;
case MLOG_UNDO_ERASE_END:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
+ if (page) {
+ ut_ad(page_type == FIL_PAGE_UNDO_LOG);
+ trx_undo_erase_page_end(page);
+ }
break;
case MLOG_UNDO_INIT:
/* Allow anything in page_type when creating a page. */
- ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
+ ptr = trx_undo_parse_page_init(ptr, end_ptr, page);
break;
- case MLOG_UNDO_HDR_CREATE:
case MLOG_UNDO_HDR_REUSE:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
- page, mtr);
+ ptr = trx_undo_parse_page_header_reuse(ptr, end_ptr, page);
+ break;
+ case MLOG_UNDO_HDR_CREATE:
+ ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
+ ptr = trx_undo_parse_page_header(ptr, end_ptr, page, mtr);
break;
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
ut_ad(!page || fil_page_type_is_index(page_type));
@@ -1663,9 +1612,15 @@ parse_log:
ptr, end_ptr, page, page_zip, index);
}
break;
+ case MLOG_ZIP_WRITE_TRX_ID:
+ /* This must be a clustered index leaf page. */
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ptr = page_zip_parse_write_trx_id(ptr, end_ptr,
+ page, page_zip);
+ break;
case MLOG_FILE_WRITE_CRYPT_DATA:
dberr_t err;
- ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block, &err));
+ ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, &err));
if (err != DB_SUCCESS) {
recv_sys->found_corrupt_log = TRUE;
@@ -1775,13 +1730,13 @@ recv_add_to_hash_table(
ut_ad(type != MLOG_INDEX_LOAD);
ut_ad(type != MLOG_TRUNCATE);
- len = rec_end - body;
+ len = ulint(rec_end - body);
recv = static_cast<recv_t*>(
mem_heap_alloc(recv_sys->heap, sizeof(recv_t)));
recv->type = type;
- recv->len = rec_end - body;
+ recv->len = ulint(rec_end - body);
recv->start_lsn = start_lsn;
recv->end_lsn = end_lsn;
@@ -1810,13 +1765,13 @@ recv_add_to_hash_table(
prev_field = &(recv->data);
- /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
+ /* Store the log record body in chunks of less than srv_page_size:
recv_sys->heap grows into the buffer pool, and bigger chunks could not
be allocated */
while (rec_end > body) {
- len = rec_end - body;
+ len = ulint(rec_end - body);
if (len > RECV_DATA_BLOCK_SIZE) {
len = RECV_DATA_BLOCK_SIZE;
@@ -1920,9 +1875,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
recv_addr->space, recv_addr->page_no);
}
- DBUG_PRINT("ib_log",
- ("Applying log to page %u:%u",
- recv_addr->space, recv_addr->page_no));
+ DBUG_LOG("ib_log", "Applying log to page " << block->page.id);
recv_addr->state = RECV_BEING_PROCESSED;
@@ -1967,11 +1920,12 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
start_lsn = end_lsn = 0;
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+ fil_space_t* space = fil_space_acquire(block->page.id.space());
while (recv) {
end_lsn = recv->end_lsn;
- ut_ad(end_lsn <= log_sys->log.scanned_lsn);
+ ut_ad(end_lsn <= log_sys.log.scanned_lsn);
if (recv->len > RECV_DATA_BLOCK_SIZE) {
/* We have to copy the record body to a separate
@@ -1993,13 +1947,6 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
with LSN less than recorded LSN is skipped.
Note: We can't skip complete recv_addr as same page may have
valid REDO records post truncate those needs to be applied. */
- bool skip_recv = false;
- if (srv_was_tablespace_truncated(fil_space_get(recv_addr->space))) {
- lsn_t init_lsn =
- truncate_t::get_truncated_tablespace_init_lsn(
- recv_addr->space);
- skip_recv = (recv->start_lsn < init_lsn);
- }
/* Ignore applying the redo logs for tablespace that is
truncated. Post recovery there is fixup action that will
@@ -2009,8 +1956,11 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
was re-inited and that would lead to an error while applying
such action. */
if (recv->start_lsn >= page_lsn
- && !srv_is_tablespace_truncated(recv_addr->space)
- && !skip_recv) {
+ && !srv_is_tablespace_truncated(space->id)
+ && !(srv_was_tablespace_truncated(space)
+ && recv->start_lsn
+ < truncate_t::get_truncated_tablespace_init_lsn(
+ space->id))) {
lsn_t end_lsn;
@@ -2027,22 +1977,20 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
recv_addr->space, recv_addr->page_no);
}
- DBUG_PRINT("ib_log",
- ("apply " LSN_PF ":"
- " %s len " ULINTPF " page %u:%u",
- recv->start_lsn,
- get_mlog_string(recv->type), recv->len,
- recv_addr->space,
- recv_addr->page_no));
+ DBUG_LOG("ib_log", "apply " << recv->start_lsn << ": "
+ << get_mlog_string(recv->type)
+ << " len " << recv->len
+ << " page " << block->page.id);
recv_parse_or_apply_log_rec_body(
recv->type, buf, buf + recv->len,
- recv_addr->space, recv_addr->page_no,
+ block->page.id.space(),
+ block->page.id.page_no(),
true, block, &mtr);
end_lsn = recv->start_lsn + recv->len;
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
- mach_write_to_8(UNIV_PAGE_SIZE
+ mach_write_to_8(srv_page_size
- FIL_PAGE_END_LSN_OLD_CHKSUM
+ page, end_lsn);
@@ -2059,6 +2007,8 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
recv = UT_LIST_GET_NEXT(rec_list, recv);
}
+ space->release();
+
#ifdef UNIV_ZIP_DEBUG
if (fil_page_index_page_check(page)) {
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
@@ -2401,7 +2351,7 @@ recv_parse_log_rec(
end_ptr));
}
- return(new_ptr - ptr);
+ return ulint(new_ptr - ptr);
}
/*******************************************************//**
@@ -2576,9 +2526,7 @@ loop:
/* Do nothing */
break;
case MLOG_CHECKPOINT:
-#if SIZE_OF_MLOG_CHECKPOINT != 1 + 8
-# error SIZE_OF_MLOG_CHECKPOINT != 1 + 8
-#endif
+ compile_time_assert(SIZE_OF_MLOG_CHECKPOINT == 1 + 8);
lsn = mach_read_from_8(ptr + 1);
if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
@@ -3110,7 +3058,6 @@ recv_scan_log_recs(
/** Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found.
-@param[in,out] group log group
@param[in] checkpoint_lsn latest checkpoint log sequence number
@param[in,out] contiguous_lsn log sequence number
until which all redo log has been scanned
@@ -3120,7 +3067,6 @@ can be applied to the tablespaces
static
bool
recv_group_scan_log_recs(
- log_group_t* group,
lsn_t checkpoint_lsn,
lsn_t* contiguous_lsn,
bool last_phase)
@@ -3149,12 +3095,12 @@ recv_group_scan_log_recs(
lsn_t end_lsn;
store_t store_to_hash = recv_sys->mlog_checkpoint_lsn == 0
? STORE_NO : (last_phase ? STORE_IF_EXISTS : STORE_YES);
- ulint available_mem = UNIV_PAGE_SIZE
+ ulint available_mem = srv_page_size
* (buf_pool_get_n_pages()
- (recv_n_pool_free_frames * srv_buf_pool_instances));
- group->scanned_lsn = end_lsn = *contiguous_lsn = ut_uint64_align_down(
- *contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
+ log_sys.log.scanned_lsn = end_lsn = *contiguous_lsn =
+ ut_uint64_align_down(*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
do {
if (last_phase && store_to_hash == STORE_NO) {
@@ -3169,15 +3115,13 @@ recv_group_scan_log_recs(
start_lsn = ut_uint64_align_down(end_lsn,
OS_FILE_LOG_BLOCK_SIZE);
end_lsn = start_lsn;
- log_group_read_log_seg(
- log_sys->buf, group, &end_lsn,
- start_lsn + RECV_SCAN_SIZE);
+ log_sys.log.read_log_seg(&end_lsn, start_lsn + RECV_SCAN_SIZE);
} while (end_lsn != start_lsn
&& !recv_scan_log_recs(
- available_mem, &store_to_hash, log_sys->buf,
+ available_mem, &store_to_hash, log_sys.buf,
checkpoint_lsn,
start_lsn, end_lsn,
- contiguous_lsn, &group->scanned_lsn));
+ contiguous_lsn, &log_sys.log.scanned_lsn));
if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
DBUG_RETURN(false);
@@ -3185,7 +3129,7 @@ recv_group_scan_log_recs(
DBUG_PRINT("ib_log", ("%s " LSN_PF " completed",
last_phase ? "rescan" : "scan",
- group->scanned_lsn));
+ log_sys.log.scanned_lsn));
DBUG_RETURN(store_to_hash == STORE_NO);
}
@@ -3393,55 +3337,35 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
log_mutex_enter();
- /* Look for the latest checkpoint from any of the log groups */
-
err = recv_find_max_checkpoint(&max_cp_field);
if (err != DB_SUCCESS) {
-skip_apply:
+
+ srv_start_lsn = recv_sys->recovered_lsn = log_sys.lsn;
log_mutex_exit();
return(err);
}
- switch (log_sys->log.format) {
- case 0:
- break;
- case LOG_HEADER_FORMAT_10_2:
- case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
- break;
- case LOG_HEADER_FORMAT_10_3:
- case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
- if (log_sys->log.subformat == 1) {
- /* 10.2 with new crash-safe TRUNCATE */
- break;
- }
- /* fall through */
- default:
- /* This must be a clean log from a newer version. */
- goto skip_apply;
- }
-
- log_group_header_read(&log_sys->log, max_cp_field);
+ log_header_read(max_cp_field);
- buf = log_sys->checkpoint_buf;
+ buf = log_sys.checkpoint_buf;
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
- /* Start reading the log groups from the checkpoint lsn up. The
- variable contiguous_lsn contains an lsn up to which the log is
- known to be contiguously written to all log groups. */
-
+ /* Start reading the log from the checkpoint lsn. The variable
+ contiguous_lsn contains an lsn up to which the log is known to
+ be contiguously written. */
recv_sys->mlog_checkpoint_lsn = 0;
- ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
+ ut_ad(RECV_SCAN_SIZE <= srv_log_buffer_size);
const lsn_t end_lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_END_LSN);
ut_ad(recv_sys->n_addrs == 0);
contiguous_lsn = checkpoint_lsn;
- switch (log_sys->log.format) {
+ switch (log_sys.log.format) {
case 0:
log_mutex_exit();
return recv_log_format_0_recover(checkpoint_lsn,
@@ -3460,9 +3384,7 @@ skip_apply:
}
/* Look for MLOG_CHECKPOINT. */
- log_group_t* group = &log_sys->log;
- recv_group_scan_log_recs(group, checkpoint_lsn, &contiguous_lsn,
- false);
+ recv_group_scan_log_recs(checkpoint_lsn, &contiguous_lsn, false);
/* The first scan should not have stored or applied any records. */
ut_ad(recv_sys->n_addrs == 0);
ut_ad(!recv_sys->found_corrupt_fs);
@@ -3479,7 +3401,7 @@ skip_apply:
}
if (recv_sys->mlog_checkpoint_lsn == 0) {
- lsn_t scan_lsn = group->scanned_lsn;
+ lsn_t scan_lsn = log_sys.log.scanned_lsn;
if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
log_mutex_exit();
ib::error err;
@@ -3492,12 +3414,12 @@ skip_apply:
return(DB_ERROR);
}
- group->scanned_lsn = checkpoint_lsn;
+ log_sys.log.scanned_lsn = checkpoint_lsn;
rescan = false;
} else {
contiguous_lsn = checkpoint_lsn;
rescan = recv_group_scan_log_recs(
- group, checkpoint_lsn, &contiguous_lsn, false);
+ checkpoint_lsn, &contiguous_lsn, false);
if ((recv_sys->found_corrupt_log && !srv_force_recovery)
|| recv_sys->found_corrupt_fs) {
@@ -3543,7 +3465,7 @@ skip_apply:
}
}
- log_sys->lsn = recv_sys->recovered_lsn;
+ log_sys.lsn = recv_sys->recovered_lsn;
if (recv_needed_recovery) {
bool missing_tablespace = false;
@@ -3570,8 +3492,7 @@ skip_apply:
lsn_t recent_stored_lsn = recv_sys->last_stored_lsn;
rescan = recv_group_scan_log_recs(
- group, checkpoint_lsn,
- &recent_stored_lsn, false);
+ checkpoint_lsn, &recent_stored_lsn, false);
ut_ad(!recv_sys->found_corrupt_fs);
@@ -3604,8 +3525,8 @@ skip_apply:
if (rescan) {
contiguous_lsn = checkpoint_lsn;
- recv_group_scan_log_recs(group, checkpoint_lsn,
- &contiguous_lsn, true);
+ recv_group_scan_log_recs(
+ checkpoint_lsn, &contiguous_lsn, true);
if ((recv_sys->found_corrupt_log
&& !srv_force_recovery)
@@ -3618,12 +3539,11 @@ skip_apply:
ut_ad(!rescan || recv_sys->n_addrs == 0);
}
- /* We currently have only one log group */
-
- if (group->scanned_lsn < checkpoint_lsn
- || group->scanned_lsn < recv_max_page_lsn) {
+ if (log_sys.log.scanned_lsn < checkpoint_lsn
+ || log_sys.log.scanned_lsn < recv_max_page_lsn) {
- ib::error() << "We scanned the log up to " << group->scanned_lsn
+ ib::error() << "We scanned the log up to "
+ << log_sys.log.scanned_lsn
<< ". A checkpoint was at " << checkpoint_lsn << " and"
" the maximum LSN on a database page was "
<< recv_max_page_lsn << ". It is possible that the"
@@ -3639,11 +3559,8 @@ skip_apply:
return(DB_ERROR);
}
- /* Synchronize the uncorrupted log groups to the most up-to-date log
- group; we also copy checkpoint info to groups */
-
- log_sys->next_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = checkpoint_no + 1;
+ log_sys.next_checkpoint_lsn = checkpoint_lsn;
+ log_sys.next_checkpoint_no = checkpoint_no + 1;
recv_synchronize_groups();
@@ -3653,24 +3570,24 @@ skip_apply:
srv_start_lsn = recv_sys->recovered_lsn;
}
- log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
- log_sys->buf_next_to_write = log_sys->buf_free;
- log_sys->write_lsn = log_sys->lsn;
+ log_sys.buf_free = ulong(log_sys.lsn % OS_FILE_LOG_BLOCK_SIZE);
+ log_sys.buf_next_to_write = log_sys.buf_free;
+ log_sys.write_lsn = log_sys.lsn;
- log_sys->last_checkpoint_lsn = checkpoint_lsn;
+ log_sys.last_checkpoint_lsn = checkpoint_lsn;
if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) {
/* Write a MLOG_CHECKPOINT marker as the first thing,
before generating any other redo log. This ensures
that subsequent crash recovery will be possible even
if the server were killed soon after this. */
- fil_names_clear(log_sys->last_checkpoint_lsn, true);
+ fil_names_clear(log_sys.last_checkpoint_lsn, true);
}
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
+ log_sys.lsn - log_sys.last_checkpoint_lsn);
- log_sys->next_checkpoint_no = ++checkpoint_no;
+ log_sys.next_checkpoint_no = ++checkpoint_no;
mutex_enter(&recv_sys->mutex);
@@ -3747,7 +3664,6 @@ recv_recovery_rollback_active(void)
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
/* Drop garbage tables. */
- if (srv_safe_truncate)
row_mysql_drop_garbage_tables();
/* Drop any auxiliary tables that were not dropped when the
@@ -3759,8 +3675,8 @@ recv_recovery_rollback_active(void)
/* Rollback the uncommitted transactions which have no user
session */
- trx_rollback_or_clean_is_active = true;
- os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
+ trx_rollback_is_active = true;
+ os_thread_create(trx_rollback_all_recovered, 0, 0);
}
}
@@ -3777,26 +3693,26 @@ recv_reset_logs(
{
ut_ad(log_mutex_own());
- log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+ log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
- log_sys->log.lsn = log_sys->lsn;
- log_sys->log.lsn_offset = LOG_FILE_HDR_SIZE;
+ log_sys.log.lsn = log_sys.lsn;
+ log_sys.log.lsn_offset = LOG_FILE_HDR_SIZE;
- log_sys->buf_next_to_write = 0;
- log_sys->write_lsn = log_sys->lsn;
+ log_sys.buf_next_to_write = 0;
+ log_sys.write_lsn = log_sys.lsn;
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = 0;
+ log_sys.next_checkpoint_no = 0;
+ log_sys.last_checkpoint_lsn = 0;
- memset(log_sys->buf, 0, log_sys->buf_size);
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+ memset(log_sys.buf, 0, srv_log_buffer_size);
+ log_block_init(log_sys.buf, log_sys.lsn);
+ log_block_set_first_rec_group(log_sys.buf, LOG_BLOCK_HDR_SIZE);
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn += LOG_BLOCK_HDR_SIZE;
+ log_sys.buf_free = LOG_BLOCK_HDR_SIZE;
+ log_sys.lsn += LOG_BLOCK_HDR_SIZE;
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- (log_sys->lsn - log_sys->last_checkpoint_lsn));
+ (log_sys.lsn - log_sys.last_checkpoint_lsn));
log_mutex_exit();
@@ -3993,6 +3909,9 @@ static const char* get_mlog_string(mlog_id_t type)
case MLOG_ZIP_PAGE_REORGANIZE:
return("MLOG_ZIP_PAGE_REORGANIZE");
+ case MLOG_ZIP_WRITE_TRX_ID:
+ return("MLOG_ZIP_WRITE_TRX_ID");
+
case MLOG_FILE_RENAME2:
return("MLOG_FILE_RENAME2");