summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0dblwr.cc
diff options
context:
space:
mode:
authorJan Lindström <jplindst@mariadb.org>2014-03-03 14:27:56 +0200
committerJan Lindström <jplindst@mariadb.org>2014-03-03 14:27:56 +0200
commit96100d6652831d6423cc59bb3f065ab7f6f0cf85 (patch)
tree3a6c9864c9add695428241931c97899fa7c9a500 /storage/innobase/buf/buf0dblwr.cc
parentb67892cf59872867514709784c54526434784ea9 (diff)
parent4b3cf4aa26833206c650be2e40caebf6191d5302 (diff)
downloadmariadb-git-96100d6652831d6423cc59bb3f065ab7f6f0cf85.tar.gz
Merge: lp:maria/10.0 latest.
Diffstat (limited to 'storage/innobase/buf/buf0dblwr.cc')
-rw-r--r--storage/innobase/buf/buf0dblwr.cc198
1 files changed, 121 insertions, 77 deletions
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 2ae67d8a41e..305e2ebba94 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,11 +39,6 @@ Created 2011/12/19
#ifndef UNIV_HOTBACKUP
-/** Time in milliseconds that we sleep when unable to find a slot in
-the doublewrite buffer or when we have to wait for a running batch
-to end. */
-#define TRX_DOUBLEWRITE_BATCH_POLL_DELAY 10000
-
#ifdef UNIV_PFS_MUTEX
/* Key to register the mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key;
@@ -105,6 +100,25 @@ buf_dblwr_get(
return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
}
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written to the dblwr buffer on disk. */
+UNIV_INLINE
+void
+buf_dblwr_sync_datafiles()
+/*======================*/
+{
+ /* Wake possible simulated aio thread to actually post the
+ writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
+
+ /* Wait that all async writes to tablespaces have been posted to
+ the OS */
+ os_aio_wait_until_no_pending_writes();
+
+ /* Now we flush the data to disk (for example, with fsync) */
+ fil_flush_file_spaces(FIL_TABLESPACE);
+}
/****************************************************************//**
Creates or initialializes the doublewrite buffer at a database start. */
@@ -132,6 +146,8 @@ buf_dblwr_init(
mutex_create(buf_dblwr_mutex_key,
&buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+ buf_dblwr->b_event = os_event_create();
+ buf_dblwr->s_event = os_event_create();
buf_dblwr->first_free = 0;
buf_dblwr->s_reserved = 0;
buf_dblwr->b_reserved = 0;
@@ -141,8 +157,8 @@ buf_dblwr_init(
buf_dblwr->block2 = mach_read_from_4(
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
- buf_dblwr->in_use = static_cast<ibool*>(
- mem_zalloc(buf_size * sizeof(ibool)));
+ buf_dblwr->in_use = static_cast<bool*>(
+ mem_zalloc(buf_size * sizeof(bool)));
buf_dblwr->write_buf_unaligned = static_cast<byte*>(
ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
@@ -368,6 +384,7 @@ buf_dblwr_init_or_restore_pages(
fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
UNIV_PAGE_SIZE, read_buf, NULL, 0);
+
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
@@ -401,9 +418,10 @@ buf_dblwr_init_or_restore_pages(
/* Read the pages from the doublewrite buffer to memory */
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
+ fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block1, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf, NULL, 0);
+
fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block2, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
@@ -474,7 +492,7 @@ buf_dblwr_init_or_restore_pages(
ulint zip_size = fil_space_get_zip_size(space_id);
/* Read in the actual page from the file */
- fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
+ fil_io(OS_FILE_READ, true, space_id, zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
read_buf, NULL, 0);
@@ -526,7 +544,7 @@ buf_dblwr_init_or_restore_pages(
doublewrite buffer to the intended
position */
- fil_io(OS_FILE_WRITE, TRUE, space_id,
+ fil_io(OS_FILE_WRITE, true, space_id,
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page, NULL, 0);
@@ -558,6 +576,8 @@ buf_dblwr_free(void)
ut_ad(buf_dblwr->s_reserved == 0);
ut_ad(buf_dblwr->b_reserved == 0);
+ os_event_free(buf_dblwr->b_event);
+ os_event_free(buf_dblwr->s_event);
ut_free(buf_dblwr->write_buf_unaligned);
buf_dblwr->write_buf_unaligned = NULL;
@@ -573,38 +593,68 @@ buf_dblwr_free(void)
}
/********************************************************************//**
-Updates the doublewrite buffer when an IO request that is part of an
-LRU or flush batch is completed. */
+Updates the doublewrite buffer when an IO request is completed. */
UNIV_INTERN
void
-buf_dblwr_update(void)
-/*==================*/
+buf_dblwr_update(
+/*=============*/
+ const buf_page_t* bpage, /*!< in: buffer block descriptor */
+ buf_flush_t flush_type)/*!< in: flush type */
{
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
return;
}
- mutex_enter(&buf_dblwr->mutex);
+ switch (flush_type) {
+ case BUF_FLUSH_LIST:
+ case BUF_FLUSH_LRU:
+ mutex_enter(&buf_dblwr->mutex);
- ut_ad(buf_dblwr->batch_running);
- ut_ad(buf_dblwr->b_reserved > 0);
- ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
+ ut_ad(buf_dblwr->batch_running);
+ ut_ad(buf_dblwr->b_reserved > 0);
+ ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
- buf_dblwr->b_reserved--;
- if (buf_dblwr->b_reserved == 0) {
+ buf_dblwr->b_reserved--;
+
+ if (buf_dblwr->b_reserved == 0) {
+ mutex_exit(&buf_dblwr->mutex);
+ /* This will finish the batch. Sync data files
+ to the disk. */
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ mutex_enter(&buf_dblwr->mutex);
+
+ /* We can now reuse the doublewrite memory buffer: */
+ buf_dblwr->first_free = 0;
+ buf_dblwr->batch_running = false;
+ os_event_set(buf_dblwr->b_event);
+ }
mutex_exit(&buf_dblwr->mutex);
- /* This will finish the batch. Sync data files
- to the disk. */
- fil_flush_file_spaces(FIL_TABLESPACE);
- mutex_enter(&buf_dblwr->mutex);
+ break;
+ case BUF_FLUSH_SINGLE_PAGE:
+ {
+ const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+ ulint i;
+ mutex_enter(&buf_dblwr->mutex);
+ for (i = srv_doublewrite_batch_size; i < size; ++i) {
+ if (buf_dblwr->buf_block_arr[i] == bpage) {
+ buf_dblwr->s_reserved--;
+ buf_dblwr->buf_block_arr[i] = NULL;
+ buf_dblwr->in_use[i] = false;
+ break;
+ }
+ }
- /* We can now reuse the doublewrite memory buffer: */
- buf_dblwr->first_free = 0;
- buf_dblwr->batch_running = FALSE;
+ /* The block we are looking for must exist as a
+ reserved block. */
+ ut_a(i < size);
+ }
+ os_event_set(buf_dblwr->s_event);
+ mutex_exit(&buf_dblwr->mutex);
+ break;
+ case BUF_FLUSH_N_TYPES:
+ ut_error;
}
-
- mutex_exit(&buf_dblwr->mutex);
}
/********************************************************************//**
@@ -699,18 +749,19 @@ static
void
buf_dblwr_write_block_to_datafile(
/*==============================*/
- const buf_page_t* bpage) /*!< in: page to write */
+ const buf_page_t* bpage, /*!< in: page to write */
+ bool sync) /*!< in: true if sync IO
+ is requested */
{
ut_a(bpage);
ut_a(buf_page_in_file(bpage));
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
+ const ulint flags = sync
+ ? OS_FILE_WRITE
+ : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
if (bpage->zip.data) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(bpage),
+ fil_io(flags, sync, buf_page_get_space(bpage),
buf_page_get_zip_size(bpage),
buf_page_get_page_no(bpage), 0,
buf_page_get_zip_size(bpage),
@@ -725,8 +776,7 @@ buf_dblwr_write_block_to_datafile(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_dblwr_check_page_lsn(block->frame);
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_block_get_space(block), 0,
+ fil_io(flags, sync, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*) block->frame, (void*) block, (ulint *)&bpage->write_size);
}
@@ -748,12 +798,12 @@ buf_dblwr_flush_buffered_writes(void)
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
+ buf_dblwr_sync_datafiles();
return;
}
try_again:
- mutex_enter(&(buf_dblwr->mutex));
+ mutex_enter(&buf_dblwr->mutex);
/* Write first to doublewrite buffer blocks. We use synchronous
aio and thus know that file write has been completed when the
@@ -761,17 +811,18 @@ try_again:
if (buf_dblwr->first_free == 0) {
- mutex_exit(&(buf_dblwr->mutex));
+ mutex_exit(&buf_dblwr->mutex);
return;
}
if (buf_dblwr->batch_running) {
- mutex_exit(&buf_dblwr->mutex);
-
/* Another thread is running the batch right now. Wait
for it to finish. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ mutex_exit(&buf_dblwr->mutex);
+
+ os_event_wait_low(buf_dblwr->b_event, sig_count);
goto try_again;
}
@@ -780,7 +831,7 @@ try_again:
/* Disallow anyone else to post to doublewrite buffer or to
start another batch of flushing. */
- buf_dblwr->batch_running = TRUE;
+ buf_dblwr->batch_running = true;
first_free = buf_dblwr->first_free;
/* Now safe to release the mutex. Note that though no other
@@ -819,7 +870,7 @@ try_again:
len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
buf_dblwr->first_free) * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block1, 0, len,
(void*) write_buf, NULL, 0);
@@ -835,7 +886,7 @@ try_again:
write_buf = buf_dblwr->write_buf
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block2, 0, len,
(void*) write_buf, NULL, 0);
@@ -865,7 +916,7 @@ flush:
ut_ad(first_free == buf_dblwr->first_free);
for (ulint i = 0; i < first_free; i++) {
buf_dblwr_write_block_to_datafile(
- buf_dblwr->buf_block_arr[i]);
+ buf_dblwr->buf_block_arr[i], false);
}
/* Wake possible simulated aio thread to actually post the
@@ -890,12 +941,11 @@ buf_dblwr_add_to_batch(
ut_a(buf_page_in_file(bpage));
try_again:
- mutex_enter(&(buf_dblwr->mutex));
+ mutex_enter(&buf_dblwr->mutex);
ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size);
if (buf_dblwr->batch_running) {
- mutex_exit(&buf_dblwr->mutex);
/* This not nearly as bad as it looks. There is only
page_cleaner thread which does background flushing
@@ -903,7 +953,10 @@ try_again:
point. The only exception is when a user thread is
forced to do a flush batch because of a sync
checkpoint. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
+ mutex_exit(&buf_dblwr->mutex);
+
+ os_event_wait_low(buf_dblwr->b_event, sig_count);
goto try_again;
}
@@ -968,7 +1021,8 @@ UNIV_INTERN
void
buf_dblwr_write_single_page(
/*========================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
+ buf_page_t* bpage, /*!< in: buffer block to write */
+ bool sync) /*!< in: true if sync IO requested */
{
ulint n_slots;
ulint size;
@@ -1005,11 +1059,12 @@ retry:
mutex_enter(&buf_dblwr->mutex);
if (buf_dblwr->s_reserved == n_slots) {
+ /* All slots are reserved. */
+ ib_int64_t sig_count =
+ os_event_reset(buf_dblwr->s_event);
mutex_exit(&buf_dblwr->mutex);
- /* All slots are reserved. Since it involves two IOs
- during the processing a sleep of 10ms should be
- enough. */
- os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+ os_event_wait_low(buf_dblwr->s_event, sig_count);
+
goto retry;
}
@@ -1022,9 +1077,14 @@ retry:
/* We are guaranteed to find a slot. */
ut_a(i < size);
- buf_dblwr->in_use[i] = TRUE;
+ buf_dblwr->in_use[i] = true;
buf_dblwr->s_reserved++;
buf_dblwr->buf_block_arr[i] = bpage;
+
+ /* increment the doublewrite flushed pages counter */
+ srv_stats.dblwr_pages_written.inc();
+ srv_stats.dblwr_writes.inc();
+
mutex_exit(&buf_dblwr->mutex);
/* Lets see if we are going to write in the first or second
@@ -1054,14 +1114,14 @@ retry:
memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) (buf_dblwr->write_buf
+ UNIV_PAGE_SIZE * i), NULL, 0);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) ((buf_block_t*) bpage)->frame,
NULL, 0);
@@ -1073,22 +1133,6 @@ retry:
/* We know that the write has been flushed to disk now
and during recovery we will find it in the doublewrite buffer
blocks. Next do the write to the intended position. */
- buf_dblwr_write_block_to_datafile(bpage);
-
- /* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
-
- mutex_enter(&buf_dblwr->mutex);
-
- buf_dblwr->s_reserved--;
- buf_dblwr->buf_block_arr[i] = NULL;
- buf_dblwr->in_use[i] = FALSE;
-
- /* increment the doublewrite flushed pages counter */
- srv_stats.dblwr_pages_written.inc();
- srv_stats.dblwr_writes.inc();
-
- mutex_exit(&(buf_dblwr->mutex));
-
+ buf_dblwr_write_block_to_datafile(bpage, sync);
}
#endif /* !UNIV_HOTBACKUP */