summaryrefslogtreecommitdiff
path: root/mysys
diff options
context:
space:
mode:
authorunknown <guilhem@gbichot3.local>2006-12-18 17:24:02 +0100
committerunknown <guilhem@gbichot3.local>2006-12-18 17:24:02 +0100
commit7199c905590391f64802913369aab7d288eff4c8 (patch)
treed38393d634c6dc8b3886863dbdab2d348526eb45 /mysys
parent71b404973c1f6343e9e63d3179c65f3642aade9a (diff)
downloadmariadb-git-7199c905590391f64802913369aab7d288eff4c8.tar.gz
WL#3071 Maria checkpoint
- cleanups, simplifications - moving the construction of the "dirty pages table" into the pagecache where it belongs (because it's the pagecache which knows dirty pages). TODO: do the same soon for the "transactions table". - fix for a small bug in the pagecache (decrementation of "changed_blocks") include/pagecache.h: prototype mysys/mf_pagecache.c: m_string.h moves up for LEX_STRING to be known for pagecache.h. In pagecache_delete_page(), we must decrement "blocks_changed" even if we just delete the page without flushing it. A new function pagecache_collect_changed_blocks_with_LSN() (used by the Checkpoint module), which stores information about the changed blocks (a.k.a. "the dirty pages table") into a LEX_STRING. This function is not tested now, it will be when there is a Checkpoint. storage/maria/ma_checkpoint.c: refining the checkpoint code: factoring functions, moving the construction of the "dirty pages table" into mf_pagecache.c (I'll do the same with the construction of the "transactions table" once Serg tells me what's the best way to do it). storage/maria/ma_least_recently_dirtied.c: Simplifying the thread which does background flushing of least-recently-dirtied pages: - in first version that thread will not flush, just do checkpoints - in 2nd version, flushing should re-use existing page cache functions like flush_pagecache_blocks(). unittest/mysys/test_file.h: m_string.h moves up for LEX_STRING to be known in pagecache.h
Diffstat (limited to 'mysys')
-rwxr-xr-xmysys/mf_pagecache.c180
1 files changed, 154 insertions, 26 deletions
diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c
index 807a3ea520a..96c855fda0a 100755
--- a/mysys/mf_pagecache.c
+++ b/mysys/mf_pagecache.c
@@ -40,9 +40,9 @@
*/
#include "mysys_priv.h"
+#include <m_string.h>
#include <pagecache.h>
#include "my_static.h"
-#include <m_string.h>
#include <my_bit.h>
#include <errno.h>
#include <stdarg.h>
@@ -295,7 +295,7 @@ struct st_pagecache_block_link
enum pagecache_page_type type; /* type of the block */
uint hits_left; /* number of hits left until promotion */
ulonglong last_hit_time; /* timestamp of the last hit */
- ulonglong rec_lsn; /* LSN when first became dirty */
+ LSN rec_lsn; /* LSN when first became dirty */
KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
};
@@ -2988,33 +2988,35 @@ restart:
goto restart;
}
- if (block->status & BLOCK_CHANGED && flush)
+ if (block->status & BLOCK_CHANGED)
{
- /* The block contains a dirty page - push it out of the cache */
-
- KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
-
- pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
- /*
- The call is thread safe because only the current
- thread might change the block->hash_link value
- */
- DBUG_ASSERT(block->pins == 1);
- error= pagecache_fwrite(pagecache,
- &block->hash_link->file,
- block->buffer,
- block->hash_link->pageno,
- block->type,
- MYF(MY_NABP | MY_WAIT_IF_FULL));
- pagecache_pthread_mutex_lock(&pagecache->cache_lock);
- pagecache->global_cache_write++;
-
- if (error)
+ if (flush)
{
- block->status|= BLOCK_ERROR;
- goto err;
+ /* The block contains a dirty page - push it out of the cache */
+
+ KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ The call is thread safe because only the current
+ thread might change the block->hash_link value
+ */
+ DBUG_ASSERT(block->pins == 1);
+ error= pagecache_fwrite(pagecache,
+ &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ pagecache->global_cache_write++;
+
+ if (error)
+ {
+ block->status|= BLOCK_ERROR;
+ goto err;
+ }
}
-
pagecache->blocks_changed--;
pagecache->global_blocks_changed--;
/*
@@ -3793,6 +3795,132 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache)
}
+/*
+ Allocates a buffer and stores in it some information about all dirty pages
+ of type PAGECACHE_LSN_PAGE.
+
+ SYNOPSIS
+ pagecache_collect_changed_blocks_with_LSN()
+ pagecache pointer to the page cache
+ str (OUT) pointer to a LEX_STRING where the allocated buffer, and
+ its size, will be put
+ max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all
+ relevant dirty pages will be put
+
+ DESCRIPTION
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is done by the caller.
+ Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
+ are not interesting for a checkpoint record.
+ The caller has the intention of doing checkpoints.
+
+ RETURN
+ 0 on success
+ 1 on error
+*/
+my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache,
+ LEX_STRING *str,
+ LSN *max_lsn)
+{
+ my_bool error;
+ ulong stored_LRD_size= 0;
+ uint file_hash;
+ char *ptr;
+ DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
+
+ *max_lsn= 0;
+ /*
+ We lock the entire cache but will be quick, just reading/writing a few MBs
+ of memory at most.
+ When we enter here, we must be sure that no "first_in_switch" situation
+ is happening or will happen (either we have to get rid of
+ first_in_switch in the code or, first_in_switch has to increment a
+ "danger" counter for this function to know it has to wait). TODO.
+ */
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ /* Count how many dirty pages are interesting */
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ /*
+ Q: is there somthing subtle with block->hash_link: can it be NULL?
+ does it have to be == hash_link->block... ?
+ */
+ DBUG_ASSERT(block->hash_link != NULL);
+ DBUG_ASSERT(block->status & BLOCK_CHANGED);
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it */
+ /*
+ In the current pagecache, rec_lsn is not set correctly:
+ 1) it is set on pagecache_unlock(), too late (a page is dirty
+ (BLOCK_CHANGED) since the first pagecache_write()). So in this
+ scenario:
+ thread1: thread2:
+ write_REDO
+ pagecache_write() checkpoint : reclsn not known
+ pagecache_unlock(sets rec_lsn)
+ commit
+ crash,
+ at recovery we will wrongly skip the REDO. It also affects the
+ low-water mark's computation.
+ 2) sometimes the unlocking can be an implicit action of
+ pagecache_write(), without any call to pagecache_unlock(), then
+ rec_lsn is not set.
+ 1) and 2) are critical problems.
+ TODO: fix this when Monty has explained how he writes BLOB pages.
+ */
+ if (0 == block->rec_lsn)
+ {
+ DBUG_ASSERT(0);
+ goto err;
+ }
+ stored_LRD_size++;
+ }
+ }
+
+ str->length= 8+(4+4+8)*stored_LRD_size;
+ if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
+ goto err;
+ ptr= str->str;
+ int8store(ptr, stored_LRD_size);
+ ptr+= 8;
+ if (0 == stored_LRD_size)
+ goto end;
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it in the checkpoint record */
+ DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) &&
+ (4 == sizeof(block->hash_link->pageno)));
+ int4store(ptr, block->hash_link->file.file);
+ ptr+= 4;
+ int4store(ptr, block->hash_link->pageno);
+ ptr+= 4;
+ int8store(ptr, (ulonglong)block->rec_lsn);
+ ptr+= 8;
+ set_if_bigger(*max_lsn, block->rec_lsn);
+ }
+ }
+ error= 0;
+ goto end;
+err:
+ error= 1;
+end:
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(error);
+}
+
+
#ifndef DBUG_OFF
/*
Test if disk-cache is ok